qemu/hw/virtio/vhost-user.c
<<
>>
Prefs
   1/*
   2 * vhost-user
   3 *
   4 * Copyright (c) 2013 Virtual Open Systems Sarl.
   5 *
   6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
   7 * See the COPYING file in the top-level directory.
   8 *
   9 */
  10
  11#include "qemu/osdep.h"
  12#include "qapi/error.h"
  13#include "hw/virtio/vhost.h"
  14#include "hw/virtio/vhost-backend.h"
  15#include "hw/virtio/virtio-net.h"
  16#include "chardev/char-fe.h"
  17#include "sysemu/kvm.h"
  18#include "qemu/error-report.h"
  19#include "qemu/sockets.h"
  20#include "sysemu/cryptodev.h"
  21#include "migration/migration.h"
  22#include "migration/postcopy-ram.h"
  23#include "trace.h"
  24
  25#include <sys/ioctl.h>
  26#include <sys/socket.h>
  27#include <sys/un.h>
  28#include <linux/vhost.h>
  29#include <linux/userfaultfd.h>
  30
  31#define VHOST_MEMORY_MAX_NREGIONS    8
  32#define VHOST_USER_F_PROTOCOL_FEATURES 30
  33
  34/*
  35 * Maximum size of virtio device config space
  36 */
  37#define VHOST_USER_MAX_CONFIG_SIZE 256
  38
  39enum VhostUserProtocolFeature {
  40    VHOST_USER_PROTOCOL_F_MQ = 0,
  41    VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
  42    VHOST_USER_PROTOCOL_F_RARP = 2,
  43    VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
  44    VHOST_USER_PROTOCOL_F_NET_MTU = 4,
  45    VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5,
  46    VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
  47    VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
  48    VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
  49    VHOST_USER_PROTOCOL_F_CONFIG = 9,
  50    VHOST_USER_PROTOCOL_F_MAX
  51};
  52
  53#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
  54
  55typedef enum VhostUserRequest {
  56    VHOST_USER_NONE = 0,
  57    VHOST_USER_GET_FEATURES = 1,
  58    VHOST_USER_SET_FEATURES = 2,
  59    VHOST_USER_SET_OWNER = 3,
  60    VHOST_USER_RESET_OWNER = 4,
  61    VHOST_USER_SET_MEM_TABLE = 5,
  62    VHOST_USER_SET_LOG_BASE = 6,
  63    VHOST_USER_SET_LOG_FD = 7,
  64    VHOST_USER_SET_VRING_NUM = 8,
  65    VHOST_USER_SET_VRING_ADDR = 9,
  66    VHOST_USER_SET_VRING_BASE = 10,
  67    VHOST_USER_GET_VRING_BASE = 11,
  68    VHOST_USER_SET_VRING_KICK = 12,
  69    VHOST_USER_SET_VRING_CALL = 13,
  70    VHOST_USER_SET_VRING_ERR = 14,
  71    VHOST_USER_GET_PROTOCOL_FEATURES = 15,
  72    VHOST_USER_SET_PROTOCOL_FEATURES = 16,
  73    VHOST_USER_GET_QUEUE_NUM = 17,
  74    VHOST_USER_SET_VRING_ENABLE = 18,
  75    VHOST_USER_SEND_RARP = 19,
  76    VHOST_USER_NET_SET_MTU = 20,
  77    VHOST_USER_SET_SLAVE_REQ_FD = 21,
  78    VHOST_USER_IOTLB_MSG = 22,
  79    VHOST_USER_SET_VRING_ENDIAN = 23,
  80    VHOST_USER_GET_CONFIG = 24,
  81    VHOST_USER_SET_CONFIG = 25,
  82    VHOST_USER_CREATE_CRYPTO_SESSION = 26,
  83    VHOST_USER_CLOSE_CRYPTO_SESSION = 27,
  84    VHOST_USER_POSTCOPY_ADVISE  = 28,
  85    VHOST_USER_POSTCOPY_LISTEN  = 29,
  86    VHOST_USER_POSTCOPY_END     = 30,
  87    VHOST_USER_MAX
  88} VhostUserRequest;
  89
  90typedef enum VhostUserSlaveRequest {
  91    VHOST_USER_SLAVE_NONE = 0,
  92    VHOST_USER_SLAVE_IOTLB_MSG = 1,
  93    VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2,
  94    VHOST_USER_SLAVE_MAX
  95}  VhostUserSlaveRequest;
  96
  97typedef struct VhostUserMemoryRegion {
  98    uint64_t guest_phys_addr;
  99    uint64_t memory_size;
 100    uint64_t userspace_addr;
 101    uint64_t mmap_offset;
 102} VhostUserMemoryRegion;
 103
 104typedef struct VhostUserMemory {
 105    uint32_t nregions;
 106    uint32_t padding;
 107    VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
 108} VhostUserMemory;
 109
 110typedef struct VhostUserLog {
 111    uint64_t mmap_size;
 112    uint64_t mmap_offset;
 113} VhostUserLog;
 114
 115typedef struct VhostUserConfig {
 116    uint32_t offset;
 117    uint32_t size;
 118    uint32_t flags;
 119    uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
 120} VhostUserConfig;
 121
 122#define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN    512
 123#define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN  64
 124
 125typedef struct VhostUserCryptoSession {
 126    /* session id for success, -1 on errors */
 127    int64_t session_id;
 128    CryptoDevBackendSymSessionInfo session_setup_data;
 129    uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN];
 130    uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN];
 131} VhostUserCryptoSession;
 132
 133static VhostUserConfig c __attribute__ ((unused));
 134#define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \
 135                                   + sizeof(c.size) \
 136                                   + sizeof(c.flags))
 137
 138typedef struct {
 139    VhostUserRequest request;
 140
 141#define VHOST_USER_VERSION_MASK     (0x3)
 142#define VHOST_USER_REPLY_MASK       (0x1<<2)
 143#define VHOST_USER_NEED_REPLY_MASK  (0x1 << 3)
 144    uint32_t flags;
 145    uint32_t size; /* the following payload size */
 146} QEMU_PACKED VhostUserHeader;
 147
 148typedef union {
 149#define VHOST_USER_VRING_IDX_MASK   (0xff)
 150#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
 151        uint64_t u64;
 152        struct vhost_vring_state state;
 153        struct vhost_vring_addr addr;
 154        VhostUserMemory memory;
 155        VhostUserLog log;
 156        struct vhost_iotlb_msg iotlb;
 157        VhostUserConfig config;
 158        VhostUserCryptoSession session;
 159} VhostUserPayload;
 160
 161typedef struct VhostUserMsg {
 162    VhostUserHeader hdr;
 163    VhostUserPayload payload;
 164} QEMU_PACKED VhostUserMsg;
 165
 166static VhostUserMsg m __attribute__ ((unused));
 167#define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader))
 168
 169#define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload))
 170
 171/* The version of the protocol we support */
 172#define VHOST_USER_VERSION    (0x1)
 173
 174struct vhost_user {
 175    struct vhost_dev *dev;
 176    CharBackend *chr;
 177    int slave_fd;
 178    NotifierWithReturn postcopy_notifier;
 179    struct PostCopyFD  postcopy_fd;
 180    uint64_t           postcopy_client_bases[VHOST_MEMORY_MAX_NREGIONS];
 181    /* Length of the region_rb and region_rb_offset arrays */
 182    size_t             region_rb_len;
 183    /* RAMBlock associated with a given region */
 184    RAMBlock         **region_rb;
 185    /* The offset from the start of the RAMBlock to the start of the
 186     * vhost region.
 187     */
 188    ram_addr_t        *region_rb_offset;
 189
 190    /* True once we've entered postcopy_listen */
 191    bool               postcopy_listen;
 192};
 193
 194static bool ioeventfd_enabled(void)
 195{
 196    return kvm_enabled() && kvm_eventfds_enabled();
 197}
 198
 199static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
 200{
 201    struct vhost_user *u = dev->opaque;
 202    CharBackend *chr = u->chr;
 203    uint8_t *p = (uint8_t *) msg;
 204    int r, size = VHOST_USER_HDR_SIZE;
 205
 206    r = qemu_chr_fe_read_all(chr, p, size);
 207    if (r != size) {
 208        error_report("Failed to read msg header. Read %d instead of %d."
 209                     " Original request %d.", r, size, msg->hdr.request);
 210        goto fail;
 211    }
 212
 213    /* validate received flags */
 214    if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
 215        error_report("Failed to read msg header."
 216                " Flags 0x%x instead of 0x%x.", msg->hdr.flags,
 217                VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
 218        goto fail;
 219    }
 220
 221    /* validate message size is sane */
 222    if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) {
 223        error_report("Failed to read msg header."
 224                " Size %d exceeds the maximum %zu.", msg->hdr.size,
 225                VHOST_USER_PAYLOAD_SIZE);
 226        goto fail;
 227    }
 228
 229    if (msg->hdr.size) {
 230        p += VHOST_USER_HDR_SIZE;
 231        size = msg->hdr.size;
 232        r = qemu_chr_fe_read_all(chr, p, size);
 233        if (r != size) {
 234            error_report("Failed to read msg payload."
 235                         " Read %d instead of %d.", r, msg->hdr.size);
 236            goto fail;
 237        }
 238    }
 239
 240    return 0;
 241
 242fail:
 243    return -1;
 244}
 245
 246static int process_message_reply(struct vhost_dev *dev,
 247                                 const VhostUserMsg *msg)
 248{
 249    VhostUserMsg msg_reply;
 250
 251    if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) {
 252        return 0;
 253    }
 254
 255    if (vhost_user_read(dev, &msg_reply) < 0) {
 256        return -1;
 257    }
 258
 259    if (msg_reply.hdr.request != msg->hdr.request) {
 260        error_report("Received unexpected msg type."
 261                     "Expected %d received %d",
 262                     msg->hdr.request, msg_reply.hdr.request);
 263        return -1;
 264    }
 265
 266    return msg_reply.payload.u64 ? -1 : 0;
 267}
 268
 269static bool vhost_user_one_time_request(VhostUserRequest request)
 270{
 271    switch (request) {
 272    case VHOST_USER_SET_OWNER:
 273    case VHOST_USER_RESET_OWNER:
 274    case VHOST_USER_SET_MEM_TABLE:
 275    case VHOST_USER_GET_QUEUE_NUM:
 276    case VHOST_USER_NET_SET_MTU:
 277        return true;
 278    default:
 279        return false;
 280    }
 281}
 282
 283/* most non-init callers ignore the error */
 284static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
 285                            int *fds, int fd_num)
 286{
 287    struct vhost_user *u = dev->opaque;
 288    CharBackend *chr = u->chr;
 289    int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size;
 290
 291    /*
 292     * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE,
 293     * we just need send it once in the first time. For later such
 294     * request, we just ignore it.
 295     */
 296    if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) {
 297        msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
 298        return 0;
 299    }
 300
 301    if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) {
 302        error_report("Failed to set msg fds.");
 303        return -1;
 304    }
 305
 306    ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size);
 307    if (ret != size) {
 308        error_report("Failed to write msg."
 309                     " Wrote %d instead of %d.", ret, size);
 310        return -1;
 311    }
 312
 313    return 0;
 314}
 315
 316static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
 317                                   struct vhost_log *log)
 318{
 319    int fds[VHOST_MEMORY_MAX_NREGIONS];
 320    size_t fd_num = 0;
 321    bool shmfd = virtio_has_feature(dev->protocol_features,
 322                                    VHOST_USER_PROTOCOL_F_LOG_SHMFD);
 323    VhostUserMsg msg = {
 324        .hdr.request = VHOST_USER_SET_LOG_BASE,
 325        .hdr.flags = VHOST_USER_VERSION,
 326        .payload.log.mmap_size = log->size * sizeof(*(log->log)),
 327        .payload.log.mmap_offset = 0,
 328        .hdr.size = sizeof(msg.payload.log),
 329    };
 330
 331    if (shmfd && log->fd != -1) {
 332        fds[fd_num++] = log->fd;
 333    }
 334
 335    if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
 336        return -1;
 337    }
 338
 339    if (shmfd) {
 340        msg.hdr.size = 0;
 341        if (vhost_user_read(dev, &msg) < 0) {
 342            return -1;
 343        }
 344
 345        if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) {
 346            error_report("Received unexpected msg type. "
 347                         "Expected %d received %d",
 348                         VHOST_USER_SET_LOG_BASE, msg.hdr.request);
 349            return -1;
 350        }
 351    }
 352
 353    return 0;
 354}
 355
 356static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
 357                                             struct vhost_memory *mem)
 358{
 359    struct vhost_user *u = dev->opaque;
 360    int fds[VHOST_MEMORY_MAX_NREGIONS];
 361    int i, fd;
 362    size_t fd_num = 0;
 363    bool reply_supported = virtio_has_feature(dev->protocol_features,
 364                                              VHOST_USER_PROTOCOL_F_REPLY_ACK);
 365    VhostUserMsg msg_reply;
 366    int region_i, msg_i;
 367
 368    VhostUserMsg msg = {
 369        .hdr.request = VHOST_USER_SET_MEM_TABLE,
 370        .hdr.flags = VHOST_USER_VERSION,
 371    };
 372
 373    if (reply_supported) {
 374        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
 375    }
 376
 377    if (u->region_rb_len < dev->mem->nregions) {
 378        u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions);
 379        u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset,
 380                                      dev->mem->nregions);
 381        memset(&(u->region_rb[u->region_rb_len]), '\0',
 382               sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len));
 383        memset(&(u->region_rb_offset[u->region_rb_len]), '\0',
 384               sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len));
 385        u->region_rb_len = dev->mem->nregions;
 386    }
 387
 388    for (i = 0; i < dev->mem->nregions; ++i) {
 389        struct vhost_memory_region *reg = dev->mem->regions + i;
 390        ram_addr_t offset;
 391        MemoryRegion *mr;
 392
 393        assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
 394        mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr,
 395                                     &offset);
 396        fd = memory_region_get_fd(mr);
 397        if (fd > 0) {
 398            trace_vhost_user_set_mem_table_withfd(fd_num, mr->name,
 399                                                  reg->memory_size,
 400                                                  reg->guest_phys_addr,
 401                                                  reg->userspace_addr, offset);
 402            u->region_rb_offset[i] = offset;
 403            u->region_rb[i] = mr->ram_block;
 404            msg.payload.memory.regions[fd_num].userspace_addr =
 405                reg->userspace_addr;
 406            msg.payload.memory.regions[fd_num].memory_size  = reg->memory_size;
 407            msg.payload.memory.regions[fd_num].guest_phys_addr =
 408                reg->guest_phys_addr;
 409            msg.payload.memory.regions[fd_num].mmap_offset = offset;
 410            assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
 411            fds[fd_num++] = fd;
 412        } else {
 413            u->region_rb_offset[i] = 0;
 414            u->region_rb[i] = NULL;
 415        }
 416    }
 417
 418    msg.payload.memory.nregions = fd_num;
 419
 420    if (!fd_num) {
 421        error_report("Failed initializing vhost-user memory map, "
 422                     "consider using -object memory-backend-file share=on");
 423        return -1;
 424    }
 425
 426    msg.hdr.size = sizeof(msg.payload.memory.nregions);
 427    msg.hdr.size += sizeof(msg.payload.memory.padding);
 428    msg.hdr.size += fd_num * sizeof(VhostUserMemoryRegion);
 429
 430    if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
 431        return -1;
 432    }
 433
 434    if (vhost_user_read(dev, &msg_reply) < 0) {
 435        return -1;
 436    }
 437
 438    if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) {
 439        error_report("%s: Received unexpected msg type."
 440                     "Expected %d received %d", __func__,
 441                     VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request);
 442        return -1;
 443    }
 444    /* We're using the same structure, just reusing one of the
 445     * fields, so it should be the same size.
 446     */
 447    if (msg_reply.hdr.size != msg.hdr.size) {
 448        error_report("%s: Unexpected size for postcopy reply "
 449                     "%d vs %d", __func__, msg_reply.hdr.size, msg.hdr.size);
 450        return -1;
 451    }
 452
 453    memset(u->postcopy_client_bases, 0,
 454           sizeof(uint64_t) * VHOST_MEMORY_MAX_NREGIONS);
 455
 456    /* They're in the same order as the regions that were sent
 457     * but some of the regions were skipped (above) if they
 458     * didn't have fd's
 459    */
 460    for (msg_i = 0, region_i = 0;
 461         region_i < dev->mem->nregions;
 462        region_i++) {
 463        if (msg_i < fd_num &&
 464            msg_reply.payload.memory.regions[msg_i].guest_phys_addr ==
 465            dev->mem->regions[region_i].guest_phys_addr) {
 466            u->postcopy_client_bases[region_i] =
 467                msg_reply.payload.memory.regions[msg_i].userspace_addr;
 468            trace_vhost_user_set_mem_table_postcopy(
 469                msg_reply.payload.memory.regions[msg_i].userspace_addr,
 470                msg.payload.memory.regions[msg_i].userspace_addr,
 471                msg_i, region_i);
 472            msg_i++;
 473        }
 474    }
 475    if (msg_i != fd_num) {
 476        error_report("%s: postcopy reply not fully consumed "
 477                     "%d vs %zd",
 478                     __func__, msg_i, fd_num);
 479        return -1;
 480    }
 481    /* Now we've registered this with the postcopy code, we ack to the client,
 482     * because now we're in the position to be able to deal with any faults
 483     * it generates.
 484     */
 485    /* TODO: Use this for failure cases as well with a bad value */
 486    msg.hdr.size = sizeof(msg.payload.u64);
 487    msg.payload.u64 = 0; /* OK */
 488    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
 489        return -1;
 490    }
 491
 492    if (reply_supported) {
 493        return process_message_reply(dev, &msg);
 494    }
 495
 496    return 0;
 497}
 498
 499static int vhost_user_set_mem_table(struct vhost_dev *dev,
 500                                    struct vhost_memory *mem)
 501{
 502    struct vhost_user *u = dev->opaque;
 503    int fds[VHOST_MEMORY_MAX_NREGIONS];
 504    int i, fd;
 505    size_t fd_num = 0;
 506    bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler;
 507    bool reply_supported = virtio_has_feature(dev->protocol_features,
 508                                          VHOST_USER_PROTOCOL_F_REPLY_ACK) &&
 509                                          !do_postcopy;
 510
 511    if (do_postcopy) {
 512        /* Postcopy has enough differences that it's best done in it's own
 513         * version
 514         */
 515        return vhost_user_set_mem_table_postcopy(dev, mem);
 516    }
 517
 518    VhostUserMsg msg = {
 519        .hdr.request = VHOST_USER_SET_MEM_TABLE,
 520        .hdr.flags = VHOST_USER_VERSION,
 521    };
 522
 523    if (reply_supported) {
 524        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
 525    }
 526
 527    for (i = 0; i < dev->mem->nregions; ++i) {
 528        struct vhost_memory_region *reg = dev->mem->regions + i;
 529        ram_addr_t offset;
 530        MemoryRegion *mr;
 531
 532        assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
 533        mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr,
 534                                     &offset);
 535        fd = memory_region_get_fd(mr);
 536        if (fd > 0) {
 537            if (fd_num == VHOST_MEMORY_MAX_NREGIONS) {
 538                error_report("Failed preparing vhost-user memory table msg");
 539                return -1;
 540            }
 541            msg.payload.memory.regions[fd_num].userspace_addr =
 542                reg->userspace_addr;
 543            msg.payload.memory.regions[fd_num].memory_size  = reg->memory_size;
 544            msg.payload.memory.regions[fd_num].guest_phys_addr =
 545                reg->guest_phys_addr;
 546            msg.payload.memory.regions[fd_num].mmap_offset = offset;
 547            fds[fd_num++] = fd;
 548        }
 549    }
 550
 551    msg.payload.memory.nregions = fd_num;
 552
 553    if (!fd_num) {
 554        error_report("Failed initializing vhost-user memory map, "
 555                     "consider using -object memory-backend-file share=on");
 556        return -1;
 557    }
 558
 559    msg.hdr.size = sizeof(msg.payload.memory.nregions);
 560    msg.hdr.size += sizeof(msg.payload.memory.padding);
 561    msg.hdr.size += fd_num * sizeof(VhostUserMemoryRegion);
 562
 563    if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
 564        return -1;
 565    }
 566
 567    if (reply_supported) {
 568        return process_message_reply(dev, &msg);
 569    }
 570
 571    return 0;
 572}
 573
 574static int vhost_user_set_vring_addr(struct vhost_dev *dev,
 575                                     struct vhost_vring_addr *addr)
 576{
 577    VhostUserMsg msg = {
 578        .hdr.request = VHOST_USER_SET_VRING_ADDR,
 579        .hdr.flags = VHOST_USER_VERSION,
 580        .payload.addr = *addr,
 581        .hdr.size = sizeof(msg.payload.addr),
 582    };
 583
 584    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
 585        return -1;
 586    }
 587
 588    return 0;
 589}
 590
 591static int vhost_user_set_vring_endian(struct vhost_dev *dev,
 592                                       struct vhost_vring_state *ring)
 593{
 594    bool cross_endian = virtio_has_feature(dev->protocol_features,
 595                                           VHOST_USER_PROTOCOL_F_CROSS_ENDIAN);
 596    VhostUserMsg msg = {
 597        .hdr.request = VHOST_USER_SET_VRING_ENDIAN,
 598        .hdr.flags = VHOST_USER_VERSION,
 599        .payload.state = *ring,
 600        .hdr.size = sizeof(msg.payload.state),
 601    };
 602
 603    if (!cross_endian) {
 604        error_report("vhost-user trying to send unhandled ioctl");
 605        return -1;
 606    }
 607
 608    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
 609        return -1;
 610    }
 611
 612    return 0;
 613}
 614
 615static int vhost_set_vring(struct vhost_dev *dev,
 616                           unsigned long int request,
 617                           struct vhost_vring_state *ring)
 618{
 619    VhostUserMsg msg = {
 620        .hdr.request = request,
 621        .hdr.flags = VHOST_USER_VERSION,
 622        .payload.state = *ring,
 623        .hdr.size = sizeof(msg.payload.state),
 624    };
 625
 626    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
 627        return -1;
 628    }
 629
 630    return 0;
 631}
 632
 633static int vhost_user_set_vring_num(struct vhost_dev *dev,
 634                                    struct vhost_vring_state *ring)
 635{
 636    return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring);
 637}
 638
 639static int vhost_user_set_vring_base(struct vhost_dev *dev,
 640                                     struct vhost_vring_state *ring)
 641{
 642    return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring);
 643}
 644
 645static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
 646{
 647    int i;
 648
 649    if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) {
 650        return -1;
 651    }
 652
 653    for (i = 0; i < dev->nvqs; ++i) {
 654        struct vhost_vring_state state = {
 655            .index = dev->vq_index + i,
 656            .num   = enable,
 657        };
 658
 659        vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
 660    }
 661
 662    return 0;
 663}
 664
 665static int vhost_user_get_vring_base(struct vhost_dev *dev,
 666                                     struct vhost_vring_state *ring)
 667{
 668    VhostUserMsg msg = {
 669        .hdr.request = VHOST_USER_GET_VRING_BASE,
 670        .hdr.flags = VHOST_USER_VERSION,
 671        .payload.state = *ring,
 672        .hdr.size = sizeof(msg.payload.state),
 673    };
 674
 675    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
 676        return -1;
 677    }
 678
 679    if (vhost_user_read(dev, &msg) < 0) {
 680        return -1;
 681    }
 682
 683    if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) {
 684        error_report("Received unexpected msg type. Expected %d received %d",
 685                     VHOST_USER_GET_VRING_BASE, msg.hdr.request);
 686        return -1;
 687    }
 688
 689    if (msg.hdr.size != sizeof(msg.payload.state)) {
 690        error_report("Received bad msg size.");
 691        return -1;
 692    }
 693
 694    *ring = msg.payload.state;
 695
 696    return 0;
 697}
 698
 699static int vhost_set_vring_file(struct vhost_dev *dev,
 700                                VhostUserRequest request,
 701                                struct vhost_vring_file *file)
 702{
 703    int fds[VHOST_MEMORY_MAX_NREGIONS];
 704    size_t fd_num = 0;
 705    VhostUserMsg msg = {
 706        .hdr.request = request,
 707        .hdr.flags = VHOST_USER_VERSION,
 708        .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK,
 709        .hdr.size = sizeof(msg.payload.u64),
 710    };
 711
 712    if (ioeventfd_enabled() && file->fd > 0) {
 713        fds[fd_num++] = file->fd;
 714    } else {
 715        msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
 716    }
 717
 718    if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
 719        return -1;
 720    }
 721
 722    return 0;
 723}
 724
 725static int vhost_user_set_vring_kick(struct vhost_dev *dev,
 726                                     struct vhost_vring_file *file)
 727{
 728    return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file);
 729}
 730
 731static int vhost_user_set_vring_call(struct vhost_dev *dev,
 732                                     struct vhost_vring_file *file)
 733{
 734    return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file);
 735}
 736
 737static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64)
 738{
 739    VhostUserMsg msg = {
 740        .hdr.request = request,
 741        .hdr.flags = VHOST_USER_VERSION,
 742        .payload.u64 = u64,
 743        .hdr.size = sizeof(msg.payload.u64),
 744    };
 745
 746    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
 747        return -1;
 748    }
 749
 750    return 0;
 751}
 752
 753static int vhost_user_set_features(struct vhost_dev *dev,
 754                                   uint64_t features)
 755{
 756    return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features);
 757}
 758
 759static int vhost_user_set_protocol_features(struct vhost_dev *dev,
 760                                            uint64_t features)
 761{
 762    return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features);
 763}
 764
 765static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
 766{
 767    VhostUserMsg msg = {
 768        .hdr.request = request,
 769        .hdr.flags = VHOST_USER_VERSION,
 770    };
 771
 772    if (vhost_user_one_time_request(request) && dev->vq_index != 0) {
 773        return 0;
 774    }
 775
 776    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
 777        return -1;
 778    }
 779
 780    if (vhost_user_read(dev, &msg) < 0) {
 781        return -1;
 782    }
 783
 784    if (msg.hdr.request != request) {
 785        error_report("Received unexpected msg type. Expected %d received %d",
 786                     request, msg.hdr.request);
 787        return -1;
 788    }
 789
 790    if (msg.hdr.size != sizeof(msg.payload.u64)) {
 791        error_report("Received bad msg size.");
 792        return -1;
 793    }
 794
 795    *u64 = msg.payload.u64;
 796
 797    return 0;
 798}
 799
 800static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
 801{
 802    return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features);
 803}
 804
 805static int vhost_user_set_owner(struct vhost_dev *dev)
 806{
 807    VhostUserMsg msg = {
 808        .hdr.request = VHOST_USER_SET_OWNER,
 809        .hdr.flags = VHOST_USER_VERSION,
 810    };
 811
 812    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
 813        return -1;
 814    }
 815
 816    return 0;
 817}
 818
 819static int vhost_user_reset_device(struct vhost_dev *dev)
 820{
 821    VhostUserMsg msg = {
 822        .hdr.request = VHOST_USER_RESET_OWNER,
 823        .hdr.flags = VHOST_USER_VERSION,
 824    };
 825
 826    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
 827        return -1;
 828    }
 829
 830    return 0;
 831}
 832
 833static int vhost_user_slave_handle_config_change(struct vhost_dev *dev)
 834{
 835    int ret = -1;
 836
 837    if (!dev->config_ops) {
 838        return -1;
 839    }
 840
 841    if (dev->config_ops->vhost_dev_config_notifier) {
 842        ret = dev->config_ops->vhost_dev_config_notifier(dev);
 843    }
 844
 845    return ret;
 846}
 847
 848static void slave_read(void *opaque)
 849{
 850    struct vhost_dev *dev = opaque;
 851    struct vhost_user *u = dev->opaque;
 852    VhostUserHeader hdr = { 0, };
 853    VhostUserPayload payload = { 0, };
 854    int size, ret = 0;
 855
 856    /* Read header */
 857    size = read(u->slave_fd, &hdr, VHOST_USER_HDR_SIZE);
 858    if (size != VHOST_USER_HDR_SIZE) {
 859        error_report("Failed to read from slave.");
 860        goto err;
 861    }
 862
 863    if (hdr.size > VHOST_USER_PAYLOAD_SIZE) {
 864        error_report("Failed to read msg header."
 865                " Size %d exceeds the maximum %zu.", hdr.size,
 866                VHOST_USER_PAYLOAD_SIZE);
 867        goto err;
 868    }
 869
 870    /* Read payload */
 871    size = read(u->slave_fd, &payload, hdr.size);
 872    if (size != hdr.size) {
 873        error_report("Failed to read payload from slave.");
 874        goto err;
 875    }
 876
 877    switch (hdr.request) {
 878    case VHOST_USER_SLAVE_IOTLB_MSG:
 879        ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb);
 880        break;
 881    case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG :
 882        ret = vhost_user_slave_handle_config_change(dev);
 883        break;
 884    default:
 885        error_report("Received unexpected msg type.");
 886        ret = -EINVAL;
 887    }
 888
 889    /*
 890     * REPLY_ACK feature handling. Other reply types has to be managed
 891     * directly in their request handlers.
 892     */
 893    if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
 894        struct iovec iovec[2];
 895
 896
 897        hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
 898        hdr.flags |= VHOST_USER_REPLY_MASK;
 899
 900        payload.u64 = !!ret;
 901        hdr.size = sizeof(payload.u64);
 902
 903        iovec[0].iov_base = &hdr;
 904        iovec[0].iov_len = VHOST_USER_HDR_SIZE;
 905        iovec[1].iov_base = &payload;
 906        iovec[1].iov_len = hdr.size;
 907
 908        size = writev(u->slave_fd, iovec, ARRAY_SIZE(iovec));
 909        if (size != VHOST_USER_HDR_SIZE + hdr.size) {
 910            error_report("Failed to send msg reply to slave.");
 911            goto err;
 912        }
 913    }
 914
 915    return;
 916
 917err:
 918    qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
 919    close(u->slave_fd);
 920    u->slave_fd = -1;
 921    return;
 922}
 923
 924static int vhost_setup_slave_channel(struct vhost_dev *dev)
 925{
 926    VhostUserMsg msg = {
 927        .hdr.request = VHOST_USER_SET_SLAVE_REQ_FD,
 928        .hdr.flags = VHOST_USER_VERSION,
 929    };
 930    struct vhost_user *u = dev->opaque;
 931    int sv[2], ret = 0;
 932    bool reply_supported = virtio_has_feature(dev->protocol_features,
 933                                              VHOST_USER_PROTOCOL_F_REPLY_ACK);
 934
 935    if (!virtio_has_feature(dev->protocol_features,
 936                            VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
 937        return 0;
 938    }
 939
 940    if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
 941        error_report("socketpair() failed");
 942        return -1;
 943    }
 944
 945    u->slave_fd = sv[0];
 946    qemu_set_fd_handler(u->slave_fd, slave_read, NULL, dev);
 947
 948    if (reply_supported) {
 949        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
 950    }
 951
 952    ret = vhost_user_write(dev, &msg, &sv[1], 1);
 953    if (ret) {
 954        goto out;
 955    }
 956
 957    if (reply_supported) {
 958        ret = process_message_reply(dev, &msg);
 959    }
 960
 961out:
 962    close(sv[1]);
 963    if (ret) {
 964        qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
 965        close(u->slave_fd);
 966        u->slave_fd = -1;
 967    }
 968
 969    return ret;
 970}
 971
 972/*
 973 * Called back from the postcopy fault thread when a fault is received on our
 974 * ufd.
 975 * TODO: This is Linux specific
 976 */
 977static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd,
 978                                             void *ufd)
 979{
 980    struct vhost_dev *dev = pcfd->data;
 981    struct vhost_user *u = dev->opaque;
 982    struct uffd_msg *msg = ufd;
 983    uint64_t faultaddr = msg->arg.pagefault.address;
 984    RAMBlock *rb = NULL;
 985    uint64_t rb_offset;
 986    int i;
 987
 988    trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr,
 989                                            dev->mem->nregions);
 990    for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
 991        trace_vhost_user_postcopy_fault_handler_loop(i,
 992                u->postcopy_client_bases[i], dev->mem->regions[i].memory_size);
 993        if (faultaddr >= u->postcopy_client_bases[i]) {
 994            /* Ofset of the fault address in the vhost region */
 995            uint64_t region_offset = faultaddr - u->postcopy_client_bases[i];
 996            if (region_offset < dev->mem->regions[i].memory_size) {
 997                rb_offset = region_offset + u->region_rb_offset[i];
 998                trace_vhost_user_postcopy_fault_handler_found(i,
 999                        region_offset, rb_offset);
1000                rb = u->region_rb[i];
1001                return postcopy_request_shared_page(pcfd, rb, faultaddr,
1002                                                    rb_offset);
1003            }
1004        }
1005    }
1006    error_report("%s: Failed to find region for fault %" PRIx64,
1007                 __func__, faultaddr);
1008    return -1;
1009}
1010
1011static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb,
1012                                     uint64_t offset)
1013{
1014    struct vhost_dev *dev = pcfd->data;
1015    struct vhost_user *u = dev->opaque;
1016    int i;
1017
1018    trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset);
1019
1020    if (!u) {
1021        return 0;
1022    }
1023    /* Translate the offset into an address in the clients address space */
1024    for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1025        if (u->region_rb[i] == rb &&
1026            offset >= u->region_rb_offset[i] &&
1027            offset < (u->region_rb_offset[i] +
1028                      dev->mem->regions[i].memory_size)) {
1029            uint64_t client_addr = (offset - u->region_rb_offset[i]) +
1030                                   u->postcopy_client_bases[i];
1031            trace_vhost_user_postcopy_waker_found(client_addr);
1032            return postcopy_wake_shared(pcfd, client_addr, rb);
1033        }
1034    }
1035
1036    trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset);
1037    return 0;
1038}
1039
1040/*
1041 * Called at the start of an inbound postcopy on reception of the
1042 * 'advise' command.
1043 */
1044static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp)
1045{
1046    struct vhost_user *u = dev->opaque;
1047    CharBackend *chr = u->chr;
1048    int ufd;
1049    VhostUserMsg msg = {
1050        .hdr.request = VHOST_USER_POSTCOPY_ADVISE,
1051        .hdr.flags = VHOST_USER_VERSION,
1052    };
1053
1054    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1055        error_setg(errp, "Failed to send postcopy_advise to vhost");
1056        return -1;
1057    }
1058
1059    if (vhost_user_read(dev, &msg) < 0) {
1060        error_setg(errp, "Failed to get postcopy_advise reply from vhost");
1061        return -1;
1062    }
1063
1064    if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) {
1065        error_setg(errp, "Unexpected msg type. Expected %d received %d",
1066                     VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request);
1067        return -1;
1068    }
1069
1070    if (msg.hdr.size) {
1071        error_setg(errp, "Received bad msg size.");
1072        return -1;
1073    }
1074    ufd = qemu_chr_fe_get_msgfd(chr);
1075    if (ufd < 0) {
1076        error_setg(errp, "%s: Failed to get ufd", __func__);
1077        return -1;
1078    }
1079    fcntl(ufd, F_SETFL, O_NONBLOCK);
1080
1081    /* register ufd with userfault thread */
1082    u->postcopy_fd.fd = ufd;
1083    u->postcopy_fd.data = dev;
1084    u->postcopy_fd.handler = vhost_user_postcopy_fault_handler;
1085    u->postcopy_fd.waker = vhost_user_postcopy_waker;
1086    u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */
1087    postcopy_register_shared_ufd(&u->postcopy_fd);
1088    return 0;
1089}
1090
1091/*
1092 * Called at the switch to postcopy on reception of the 'listen' command.
1093 */
1094static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp)
1095{
1096    struct vhost_user *u = dev->opaque;
1097    int ret;
1098    VhostUserMsg msg = {
1099        .hdr.request = VHOST_USER_POSTCOPY_LISTEN,
1100        .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1101    };
1102    u->postcopy_listen = true;
1103    trace_vhost_user_postcopy_listen();
1104    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1105        error_setg(errp, "Failed to send postcopy_listen to vhost");
1106        return -1;
1107    }
1108
1109    ret = process_message_reply(dev, &msg);
1110    if (ret) {
1111        error_setg(errp, "Failed to receive reply to postcopy_listen");
1112        return ret;
1113    }
1114
1115    return 0;
1116}
1117
1118/*
1119 * Called at the end of postcopy
1120 */
1121static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp)
1122{
1123    VhostUserMsg msg = {
1124        .hdr.request = VHOST_USER_POSTCOPY_END,
1125        .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1126    };
1127    int ret;
1128    struct vhost_user *u = dev->opaque;
1129
1130    trace_vhost_user_postcopy_end_entry();
1131    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1132        error_setg(errp, "Failed to send postcopy_end to vhost");
1133        return -1;
1134    }
1135
1136    ret = process_message_reply(dev, &msg);
1137    if (ret) {
1138        error_setg(errp, "Failed to receive reply to postcopy_end");
1139        return ret;
1140    }
1141    postcopy_unregister_shared_ufd(&u->postcopy_fd);
1142    u->postcopy_fd.handler = NULL;
1143
1144    trace_vhost_user_postcopy_end_exit();
1145
1146    return 0;
1147}
1148
1149static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier,
1150                                        void *opaque)
1151{
1152    struct PostcopyNotifyData *pnd = opaque;
1153    struct vhost_user *u = container_of(notifier, struct vhost_user,
1154                                         postcopy_notifier);
1155    struct vhost_dev *dev = u->dev;
1156
1157    switch (pnd->reason) {
1158    case POSTCOPY_NOTIFY_PROBE:
1159        if (!virtio_has_feature(dev->protocol_features,
1160                                VHOST_USER_PROTOCOL_F_PAGEFAULT)) {
1161            /* TODO: Get the device name into this error somehow */
1162            error_setg(pnd->errp,
1163                       "vhost-user backend not capable of postcopy");
1164            return -ENOENT;
1165        }
1166        break;
1167
1168    case POSTCOPY_NOTIFY_INBOUND_ADVISE:
1169        return vhost_user_postcopy_advise(dev, pnd->errp);
1170
1171    case POSTCOPY_NOTIFY_INBOUND_LISTEN:
1172        return vhost_user_postcopy_listen(dev, pnd->errp);
1173
1174    case POSTCOPY_NOTIFY_INBOUND_END:
1175        return vhost_user_postcopy_end(dev, pnd->errp);
1176
1177    default:
1178        /* We ignore notifications we don't know */
1179        break;
1180    }
1181
1182    return 0;
1183}
1184
1185static int vhost_user_init(struct vhost_dev *dev, void *opaque)
1186{
1187    uint64_t features, protocol_features;
1188    struct vhost_user *u;
1189    int err;
1190
1191    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1192
1193    u = g_new0(struct vhost_user, 1);
1194    u->chr = opaque;
1195    u->slave_fd = -1;
1196    u->dev = dev;
1197    dev->opaque = u;
1198
1199    err = vhost_user_get_features(dev, &features);
1200    if (err < 0) {
1201        return err;
1202    }
1203
1204    if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
1205        dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
1206
1207        err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES,
1208                                 &protocol_features);
1209        if (err < 0) {
1210            return err;
1211        }
1212
1213        dev->protocol_features =
1214            protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK;
1215
1216        if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) {
1217            /* Don't acknowledge CONFIG feature if device doesn't support it */
1218            dev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
1219        } else if (!(protocol_features &
1220                    (1ULL << VHOST_USER_PROTOCOL_F_CONFIG))) {
1221            error_report("Device expects VHOST_USER_PROTOCOL_F_CONFIG "
1222                    "but backend does not support it.");
1223            return -1;
1224        }
1225
1226        err = vhost_user_set_protocol_features(dev, dev->protocol_features);
1227        if (err < 0) {
1228            return err;
1229        }
1230
1231        /* query the max queues we support if backend supports Multiple Queue */
1232        if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) {
1233            err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM,
1234                                     &dev->max_queues);
1235            if (err < 0) {
1236                return err;
1237            }
1238        }
1239
1240        if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) &&
1241                !(virtio_has_feature(dev->protocol_features,
1242                    VHOST_USER_PROTOCOL_F_SLAVE_REQ) &&
1243                 virtio_has_feature(dev->protocol_features,
1244                    VHOST_USER_PROTOCOL_F_REPLY_ACK))) {
1245            error_report("IOMMU support requires reply-ack and "
1246                         "slave-req protocol features.");
1247            return -1;
1248        }
1249    }
1250
1251    if (dev->migration_blocker == NULL &&
1252        !virtio_has_feature(dev->protocol_features,
1253                            VHOST_USER_PROTOCOL_F_LOG_SHMFD)) {
1254        error_setg(&dev->migration_blocker,
1255                   "Migration disabled: vhost-user backend lacks "
1256                   "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature.");
1257    }
1258
1259    err = vhost_setup_slave_channel(dev);
1260    if (err < 0) {
1261        return err;
1262    }
1263
1264    u->postcopy_notifier.notify = vhost_user_postcopy_notifier;
1265    postcopy_add_notifier(&u->postcopy_notifier);
1266
1267    return 0;
1268}
1269
1270static int vhost_user_cleanup(struct vhost_dev *dev)
1271{
1272    struct vhost_user *u;
1273
1274    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1275
1276    u = dev->opaque;
1277    if (u->postcopy_notifier.notify) {
1278        postcopy_remove_notifier(&u->postcopy_notifier);
1279        u->postcopy_notifier.notify = NULL;
1280    }
1281    if (u->slave_fd >= 0) {
1282        qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
1283        close(u->slave_fd);
1284        u->slave_fd = -1;
1285    }
1286    g_free(u->region_rb);
1287    u->region_rb = NULL;
1288    g_free(u->region_rb_offset);
1289    u->region_rb_offset = NULL;
1290    u->region_rb_len = 0;
1291    g_free(u);
1292    dev->opaque = 0;
1293
1294    return 0;
1295}
1296
1297static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx)
1298{
1299    assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
1300
1301    return idx;
1302}
1303
1304static int vhost_user_memslots_limit(struct vhost_dev *dev)
1305{
1306    return VHOST_MEMORY_MAX_NREGIONS;
1307}
1308
1309static bool vhost_user_requires_shm_log(struct vhost_dev *dev)
1310{
1311    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1312
1313    return virtio_has_feature(dev->protocol_features,
1314                              VHOST_USER_PROTOCOL_F_LOG_SHMFD);
1315}
1316
1317static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr)
1318{
1319    VhostUserMsg msg = { 0 };
1320
1321    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1322
1323    /* If guest supports GUEST_ANNOUNCE do nothing */
1324    if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) {
1325        return 0;
1326    }
1327
1328    /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */
1329    if (virtio_has_feature(dev->protocol_features,
1330                           VHOST_USER_PROTOCOL_F_RARP)) {
1331        msg.hdr.request = VHOST_USER_SEND_RARP;
1332        msg.hdr.flags = VHOST_USER_VERSION;
1333        memcpy((char *)&msg.payload.u64, mac_addr, 6);
1334        msg.hdr.size = sizeof(msg.payload.u64);
1335
1336        return vhost_user_write(dev, &msg, NULL, 0);
1337    }
1338    return -1;
1339}
1340
1341static bool vhost_user_can_merge(struct vhost_dev *dev,
1342                                 uint64_t start1, uint64_t size1,
1343                                 uint64_t start2, uint64_t size2)
1344{
1345    ram_addr_t offset;
1346    int mfd, rfd;
1347    MemoryRegion *mr;
1348
1349    mr = memory_region_from_host((void *)(uintptr_t)start1, &offset);
1350    mfd = memory_region_get_fd(mr);
1351
1352    mr = memory_region_from_host((void *)(uintptr_t)start2, &offset);
1353    rfd = memory_region_get_fd(mr);
1354
1355    return mfd == rfd;
1356}
1357
1358static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu)
1359{
1360    VhostUserMsg msg;
1361    bool reply_supported = virtio_has_feature(dev->protocol_features,
1362                                              VHOST_USER_PROTOCOL_F_REPLY_ACK);
1363
1364    if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) {
1365        return 0;
1366    }
1367
1368    msg.hdr.request = VHOST_USER_NET_SET_MTU;
1369    msg.payload.u64 = mtu;
1370    msg.hdr.size = sizeof(msg.payload.u64);
1371    msg.hdr.flags = VHOST_USER_VERSION;
1372    if (reply_supported) {
1373        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1374    }
1375
1376    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1377        return -1;
1378    }
1379
1380    /* If reply_ack supported, slave has to ack specified MTU is valid */
1381    if (reply_supported) {
1382        return process_message_reply(dev, &msg);
1383    }
1384
1385    return 0;
1386}
1387
1388static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev,
1389                                            struct vhost_iotlb_msg *imsg)
1390{
1391    VhostUserMsg msg = {
1392        .hdr.request = VHOST_USER_IOTLB_MSG,
1393        .hdr.size = sizeof(msg.payload.iotlb),
1394        .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1395        .payload.iotlb = *imsg,
1396    };
1397
1398    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1399        return -EFAULT;
1400    }
1401
1402    return process_message_reply(dev, &msg);
1403}
1404
1405
1406static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled)
1407{
1408    /* No-op as the receive channel is not dedicated to IOTLB messages. */
1409}
1410
1411static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config,
1412                                 uint32_t config_len)
1413{
1414    VhostUserMsg msg = {
1415        .hdr.request = VHOST_USER_GET_CONFIG,
1416        .hdr.flags = VHOST_USER_VERSION,
1417        .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len,
1418    };
1419
1420    if (!virtio_has_feature(dev->protocol_features,
1421                VHOST_USER_PROTOCOL_F_CONFIG)) {
1422        return -1;
1423    }
1424
1425    if (config_len > VHOST_USER_MAX_CONFIG_SIZE) {
1426        return -1;
1427    }
1428
1429    msg.payload.config.offset = 0;
1430    msg.payload.config.size = config_len;
1431    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1432        return -1;
1433    }
1434
1435    if (vhost_user_read(dev, &msg) < 0) {
1436        return -1;
1437    }
1438
1439    if (msg.hdr.request != VHOST_USER_GET_CONFIG) {
1440        error_report("Received unexpected msg type. Expected %d received %d",
1441                     VHOST_USER_GET_CONFIG, msg.hdr.request);
1442        return -1;
1443    }
1444
1445    if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) {
1446        error_report("Received bad msg size.");
1447        return -1;
1448    }
1449
1450    memcpy(config, msg.payload.config.region, config_len);
1451
1452    return 0;
1453}
1454
1455static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data,
1456                                 uint32_t offset, uint32_t size, uint32_t flags)
1457{
1458    uint8_t *p;
1459    bool reply_supported = virtio_has_feature(dev->protocol_features,
1460                                              VHOST_USER_PROTOCOL_F_REPLY_ACK);
1461
1462    VhostUserMsg msg = {
1463        .hdr.request = VHOST_USER_SET_CONFIG,
1464        .hdr.flags = VHOST_USER_VERSION,
1465        .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size,
1466    };
1467
1468    if (!virtio_has_feature(dev->protocol_features,
1469                VHOST_USER_PROTOCOL_F_CONFIG)) {
1470        return -1;
1471    }
1472
1473    if (reply_supported) {
1474        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1475    }
1476
1477    if (size > VHOST_USER_MAX_CONFIG_SIZE) {
1478        return -1;
1479    }
1480
1481    msg.payload.config.offset = offset,
1482    msg.payload.config.size = size,
1483    msg.payload.config.flags = flags,
1484    p = msg.payload.config.region;
1485    memcpy(p, data, size);
1486
1487    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1488        return -1;
1489    }
1490
1491    if (reply_supported) {
1492        return process_message_reply(dev, &msg);
1493    }
1494
1495    return 0;
1496}
1497
1498static int vhost_user_crypto_create_session(struct vhost_dev *dev,
1499                                            void *session_info,
1500                                            uint64_t *session_id)
1501{
1502    bool crypto_session = virtio_has_feature(dev->protocol_features,
1503                                       VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
1504    CryptoDevBackendSymSessionInfo *sess_info = session_info;
1505    VhostUserMsg msg = {
1506        .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION,
1507        .hdr.flags = VHOST_USER_VERSION,
1508        .hdr.size = sizeof(msg.payload.session),
1509    };
1510
1511    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1512
1513    if (!crypto_session) {
1514        error_report("vhost-user trying to send unhandled ioctl");
1515        return -1;
1516    }
1517
1518    memcpy(&msg.payload.session.session_setup_data, sess_info,
1519              sizeof(CryptoDevBackendSymSessionInfo));
1520    if (sess_info->key_len) {
1521        memcpy(&msg.payload.session.key, sess_info->cipher_key,
1522               sess_info->key_len);
1523    }
1524    if (sess_info->auth_key_len > 0) {
1525        memcpy(&msg.payload.session.auth_key, sess_info->auth_key,
1526               sess_info->auth_key_len);
1527    }
1528    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1529        error_report("vhost_user_write() return -1, create session failed");
1530        return -1;
1531    }
1532
1533    if (vhost_user_read(dev, &msg) < 0) {
1534        error_report("vhost_user_read() return -1, create session failed");
1535        return -1;
1536    }
1537
1538    if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) {
1539        error_report("Received unexpected msg type. Expected %d received %d",
1540                     VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request);
1541        return -1;
1542    }
1543
1544    if (msg.hdr.size != sizeof(msg.payload.session)) {
1545        error_report("Received bad msg size.");
1546        return -1;
1547    }
1548
1549    if (msg.payload.session.session_id < 0) {
1550        error_report("Bad session id: %" PRId64 "",
1551                              msg.payload.session.session_id);
1552        return -1;
1553    }
1554    *session_id = msg.payload.session.session_id;
1555
1556    return 0;
1557}
1558
1559static int
1560vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id)
1561{
1562    bool crypto_session = virtio_has_feature(dev->protocol_features,
1563                                       VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
1564    VhostUserMsg msg = {
1565        .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION,
1566        .hdr.flags = VHOST_USER_VERSION,
1567        .hdr.size = sizeof(msg.payload.u64),
1568    };
1569    msg.payload.u64 = session_id;
1570
1571    if (!crypto_session) {
1572        error_report("vhost-user trying to send unhandled ioctl");
1573        return -1;
1574    }
1575
1576    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1577        error_report("vhost_user_write() return -1, close session failed");
1578        return -1;
1579    }
1580
1581    return 0;
1582}
1583
1584const VhostOps user_ops = {
1585        .backend_type = VHOST_BACKEND_TYPE_USER,
1586        .vhost_backend_init = vhost_user_init,
1587        .vhost_backend_cleanup = vhost_user_cleanup,
1588        .vhost_backend_memslots_limit = vhost_user_memslots_limit,
1589        .vhost_set_log_base = vhost_user_set_log_base,
1590        .vhost_set_mem_table = vhost_user_set_mem_table,
1591        .vhost_set_vring_addr = vhost_user_set_vring_addr,
1592        .vhost_set_vring_endian = vhost_user_set_vring_endian,
1593        .vhost_set_vring_num = vhost_user_set_vring_num,
1594        .vhost_set_vring_base = vhost_user_set_vring_base,
1595        .vhost_get_vring_base = vhost_user_get_vring_base,
1596        .vhost_set_vring_kick = vhost_user_set_vring_kick,
1597        .vhost_set_vring_call = vhost_user_set_vring_call,
1598        .vhost_set_features = vhost_user_set_features,
1599        .vhost_get_features = vhost_user_get_features,
1600        .vhost_set_owner = vhost_user_set_owner,
1601        .vhost_reset_device = vhost_user_reset_device,
1602        .vhost_get_vq_index = vhost_user_get_vq_index,
1603        .vhost_set_vring_enable = vhost_user_set_vring_enable,
1604        .vhost_requires_shm_log = vhost_user_requires_shm_log,
1605        .vhost_migration_done = vhost_user_migration_done,
1606        .vhost_backend_can_merge = vhost_user_can_merge,
1607        .vhost_net_set_mtu = vhost_user_net_set_mtu,
1608        .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback,
1609        .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg,
1610        .vhost_get_config = vhost_user_get_config,
1611        .vhost_set_config = vhost_user_set_config,
1612        .vhost_crypto_create_session = vhost_user_crypto_create_session,
1613        .vhost_crypto_close_session = vhost_user_crypto_close_session,
1614};
1615