dpdk/lib/vhost/socket.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright(c) 2010-2016 Intel Corporation
   3 */
   4
   5#include <stdint.h>
   6#include <stdio.h>
   7#include <limits.h>
   8#include <stdlib.h>
   9#include <unistd.h>
  10#include <string.h>
  11#include <sys/types.h>
  12#include <sys/socket.h>
  13#include <sys/un.h>
  14#include <sys/queue.h>
  15#include <errno.h>
  16#include <fcntl.h>
  17#include <pthread.h>
  18
  19#include <rte_log.h>
  20
  21#include "fd_man.h"
  22#include "vhost.h"
  23#include "vhost_user.h"
  24
  25
  26TAILQ_HEAD(vhost_user_connection_list, vhost_user_connection);
  27
  28/*
  29 * Every time rte_vhost_driver_register() is invoked, an associated
  30 * vhost_user_socket struct will be created.
  31 */
  32struct vhost_user_socket {
  33        struct vhost_user_connection_list conn_list;
  34        pthread_mutex_t conn_mutex;
  35        char *path;
  36        int socket_fd;
  37        struct sockaddr_un un;
  38        bool is_server;
  39        bool reconnect;
  40        bool iommu_support;
  41        bool use_builtin_virtio_net;
  42        bool extbuf;
  43        bool linearbuf;
  44        bool async_copy;
  45        bool net_compliant_ol_flags;
  46
  47        /*
  48         * The "supported_features" indicates the feature bits the
  49         * vhost driver supports. The "features" indicates the feature
  50         * bits after the rte_vhost_driver_features_disable/enable().
  51         * It is also the final feature bits used for vhost-user
  52         * features negotiation.
  53         */
  54        uint64_t supported_features;
  55        uint64_t features;
  56
  57        uint64_t protocol_features;
  58
  59        struct rte_vdpa_device *vdpa_dev;
  60
  61        struct vhost_device_ops const *notify_ops;
  62};
  63
  64struct vhost_user_connection {
  65        struct vhost_user_socket *vsocket;
  66        int connfd;
  67        int vid;
  68
  69        TAILQ_ENTRY(vhost_user_connection) next;
  70};
  71
  72#define MAX_VHOST_SOCKET 1024
  73struct vhost_user {
  74        struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET];
  75        struct fdset fdset;
  76        int vsocket_cnt;
  77        pthread_mutex_t mutex;
  78};
  79
  80#define MAX_VIRTIO_BACKLOG 128
  81
  82static void vhost_user_server_new_connection(int fd, void *data, int *remove);
  83static void vhost_user_read_cb(int fd, void *dat, int *remove);
  84static int create_unix_socket(struct vhost_user_socket *vsocket);
  85static int vhost_user_start_client(struct vhost_user_socket *vsocket);
  86
  87static struct vhost_user vhost_user = {
  88        .fdset = {
  89                .fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} },
  90                .fd_mutex = PTHREAD_MUTEX_INITIALIZER,
  91                .fd_pooling_mutex = PTHREAD_MUTEX_INITIALIZER,
  92                .num = 0
  93        },
  94        .vsocket_cnt = 0,
  95        .mutex = PTHREAD_MUTEX_INITIALIZER,
  96};
  97
  98/*
  99 * return bytes# of read on success or negative val on failure. Update fdnum
 100 * with number of fds read.
 101 */
 102int
 103read_fd_message(int sockfd, char *buf, int buflen, int *fds, int max_fds,
 104                int *fd_num)
 105{
 106        struct iovec iov;
 107        struct msghdr msgh;
 108        char control[CMSG_SPACE(max_fds * sizeof(int))];
 109        struct cmsghdr *cmsg;
 110        int got_fds = 0;
 111        int ret;
 112
 113        *fd_num = 0;
 114
 115        memset(&msgh, 0, sizeof(msgh));
 116        iov.iov_base = buf;
 117        iov.iov_len  = buflen;
 118
 119        msgh.msg_iov = &iov;
 120        msgh.msg_iovlen = 1;
 121        msgh.msg_control = control;
 122        msgh.msg_controllen = sizeof(control);
 123
 124        ret = recvmsg(sockfd, &msgh, 0);
 125        if (ret <= 0) {
 126                if (ret)
 127                        VHOST_LOG_CONFIG(ERR, "recvmsg failed\n");
 128                return ret;
 129        }
 130
 131        if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
 132                VHOST_LOG_CONFIG(ERR, "truncated msg\n");
 133                return -1;
 134        }
 135
 136        for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
 137                cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
 138                if ((cmsg->cmsg_level == SOL_SOCKET) &&
 139                        (cmsg->cmsg_type == SCM_RIGHTS)) {
 140                        got_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
 141                        *fd_num = got_fds;
 142                        memcpy(fds, CMSG_DATA(cmsg), got_fds * sizeof(int));
 143                        break;
 144                }
 145        }
 146
 147        /* Clear out unused file descriptors */
 148        while (got_fds < max_fds)
 149                fds[got_fds++] = -1;
 150
 151        return ret;
 152}
 153
 154int
 155send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
 156{
 157
 158        struct iovec iov;
 159        struct msghdr msgh;
 160        size_t fdsize = fd_num * sizeof(int);
 161        char control[CMSG_SPACE(fdsize)];
 162        struct cmsghdr *cmsg;
 163        int ret;
 164
 165        memset(&msgh, 0, sizeof(msgh));
 166        iov.iov_base = buf;
 167        iov.iov_len = buflen;
 168
 169        msgh.msg_iov = &iov;
 170        msgh.msg_iovlen = 1;
 171
 172        if (fds && fd_num > 0) {
 173                msgh.msg_control = control;
 174                msgh.msg_controllen = sizeof(control);
 175                cmsg = CMSG_FIRSTHDR(&msgh);
 176                if (cmsg == NULL) {
 177                        VHOST_LOG_CONFIG(ERR, "cmsg == NULL\n");
 178                        errno = EINVAL;
 179                        return -1;
 180                }
 181                cmsg->cmsg_len = CMSG_LEN(fdsize);
 182                cmsg->cmsg_level = SOL_SOCKET;
 183                cmsg->cmsg_type = SCM_RIGHTS;
 184                memcpy(CMSG_DATA(cmsg), fds, fdsize);
 185        } else {
 186                msgh.msg_control = NULL;
 187                msgh.msg_controllen = 0;
 188        }
 189
 190        do {
 191                ret = sendmsg(sockfd, &msgh, MSG_NOSIGNAL);
 192        } while (ret < 0 && errno == EINTR);
 193
 194        if (ret < 0) {
 195                VHOST_LOG_CONFIG(ERR,  "sendmsg error\n");
 196                return ret;
 197        }
 198
 199        return ret;
 200}
 201
 202static void
 203vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
 204{
 205        int vid;
 206        size_t size;
 207        struct vhost_user_connection *conn;
 208        int ret;
 209        struct virtio_net *dev;
 210
 211        if (vsocket == NULL)
 212                return;
 213
 214        conn = malloc(sizeof(*conn));
 215        if (conn == NULL) {
 216                close(fd);
 217                return;
 218        }
 219
 220        vid = vhost_new_device();
 221        if (vid == -1) {
 222                goto err;
 223        }
 224
 225        size = strnlen(vsocket->path, PATH_MAX);
 226        vhost_set_ifname(vid, vsocket->path, size);
 227
 228        vhost_setup_virtio_net(vid, vsocket->use_builtin_virtio_net,
 229                vsocket->net_compliant_ol_flags);
 230
 231        vhost_attach_vdpa_device(vid, vsocket->vdpa_dev);
 232
 233        if (vsocket->extbuf)
 234                vhost_enable_extbuf(vid);
 235
 236        if (vsocket->linearbuf)
 237                vhost_enable_linearbuf(vid);
 238
 239        if (vsocket->async_copy) {
 240                dev = get_device(vid);
 241
 242                if (dev)
 243                        dev->async_copy = 1;
 244        }
 245
 246        VHOST_LOG_CONFIG(INFO, "new device, handle is %d\n", vid);
 247
 248        if (vsocket->notify_ops->new_connection) {
 249                ret = vsocket->notify_ops->new_connection(vid);
 250                if (ret < 0) {
 251                        VHOST_LOG_CONFIG(ERR,
 252                                "failed to add vhost user connection with fd %d\n",
 253                                fd);
 254                        goto err_cleanup;
 255                }
 256        }
 257
 258        conn->connfd = fd;
 259        conn->vsocket = vsocket;
 260        conn->vid = vid;
 261        ret = fdset_add(&vhost_user.fdset, fd, vhost_user_read_cb,
 262                        NULL, conn);
 263        if (ret < 0) {
 264                VHOST_LOG_CONFIG(ERR,
 265                        "failed to add fd %d into vhost server fdset\n",
 266                        fd);
 267
 268                if (vsocket->notify_ops->destroy_connection)
 269                        vsocket->notify_ops->destroy_connection(conn->vid);
 270
 271                goto err_cleanup;
 272        }
 273
 274        pthread_mutex_lock(&vsocket->conn_mutex);
 275        TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next);
 276        pthread_mutex_unlock(&vsocket->conn_mutex);
 277
 278        fdset_pipe_notify(&vhost_user.fdset);
 279        return;
 280
 281err_cleanup:
 282        vhost_destroy_device(vid);
 283err:
 284        free(conn);
 285        close(fd);
 286}
 287
 288/* call back when there is new vhost-user connection from client  */
 289static void
 290vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused)
 291{
 292        struct vhost_user_socket *vsocket = dat;
 293
 294        fd = accept(fd, NULL, NULL);
 295        if (fd < 0)
 296                return;
 297
 298        VHOST_LOG_CONFIG(INFO, "new vhost user connection is %d\n", fd);
 299        vhost_user_add_connection(fd, vsocket);
 300}
 301
 302static void
 303vhost_user_read_cb(int connfd, void *dat, int *remove)
 304{
 305        struct vhost_user_connection *conn = dat;
 306        struct vhost_user_socket *vsocket = conn->vsocket;
 307        int ret;
 308
 309        ret = vhost_user_msg_handler(conn->vid, connfd);
 310        if (ret < 0) {
 311                struct virtio_net *dev = get_device(conn->vid);
 312
 313                close(connfd);
 314                *remove = 1;
 315
 316                if (dev)
 317                        vhost_destroy_device_notify(dev);
 318
 319                if (vsocket->notify_ops->destroy_connection)
 320                        vsocket->notify_ops->destroy_connection(conn->vid);
 321
 322                vhost_destroy_device(conn->vid);
 323
 324                if (vsocket->reconnect) {
 325                        create_unix_socket(vsocket);
 326                        vhost_user_start_client(vsocket);
 327                }
 328
 329                pthread_mutex_lock(&vsocket->conn_mutex);
 330                TAILQ_REMOVE(&vsocket->conn_list, conn, next);
 331                pthread_mutex_unlock(&vsocket->conn_mutex);
 332
 333                free(conn);
 334        }
 335}
 336
 337static int
 338create_unix_socket(struct vhost_user_socket *vsocket)
 339{
 340        int fd;
 341        struct sockaddr_un *un = &vsocket->un;
 342
 343        fd = socket(AF_UNIX, SOCK_STREAM, 0);
 344        if (fd < 0)
 345                return -1;
 346        VHOST_LOG_CONFIG(INFO, "vhost-user %s: socket created, fd: %d\n",
 347                vsocket->is_server ? "server" : "client", fd);
 348
 349        if (!vsocket->is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) {
 350                VHOST_LOG_CONFIG(ERR,
 351                        "vhost-user: can't set nonblocking mode for socket, fd: "
 352                        "%d (%s)\n", fd, strerror(errno));
 353                close(fd);
 354                return -1;
 355        }
 356
 357        memset(un, 0, sizeof(*un));
 358        un->sun_family = AF_UNIX;
 359        strncpy(un->sun_path, vsocket->path, sizeof(un->sun_path));
 360        un->sun_path[sizeof(un->sun_path) - 1] = '\0';
 361
 362        vsocket->socket_fd = fd;
 363        return 0;
 364}
 365
 366static int
 367vhost_user_start_server(struct vhost_user_socket *vsocket)
 368{
 369        int ret;
 370        int fd = vsocket->socket_fd;
 371        const char *path = vsocket->path;
 372
 373        /*
 374         * bind () may fail if the socket file with the same name already
 375         * exists. But the library obviously should not delete the file
 376         * provided by the user, since we can not be sure that it is not
 377         * being used by other applications. Moreover, many applications form
 378         * socket names based on user input, which is prone to errors.
 379         *
 380         * The user must ensure that the socket does not exist before
 381         * registering the vhost driver in server mode.
 382         */
 383        ret = bind(fd, (struct sockaddr *)&vsocket->un, sizeof(vsocket->un));
 384        if (ret < 0) {
 385                VHOST_LOG_CONFIG(ERR,
 386                        "failed to bind to %s: %s; remove it and try again\n",
 387                        path, strerror(errno));
 388                goto err;
 389        }
 390        VHOST_LOG_CONFIG(INFO, "bind to %s\n", path);
 391
 392        ret = listen(fd, MAX_VIRTIO_BACKLOG);
 393        if (ret < 0)
 394                goto err;
 395
 396        ret = fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection,
 397                  NULL, vsocket);
 398        if (ret < 0) {
 399                VHOST_LOG_CONFIG(ERR,
 400                        "failed to add listen fd %d to vhost server fdset\n",
 401                        fd);
 402                goto err;
 403        }
 404
 405        return 0;
 406
 407err:
 408        close(fd);
 409        return -1;
 410}
 411
 412struct vhost_user_reconnect {
 413        struct sockaddr_un un;
 414        int fd;
 415        struct vhost_user_socket *vsocket;
 416
 417        TAILQ_ENTRY(vhost_user_reconnect) next;
 418};
 419
 420TAILQ_HEAD(vhost_user_reconnect_tailq_list, vhost_user_reconnect);
 421struct vhost_user_reconnect_list {
 422        struct vhost_user_reconnect_tailq_list head;
 423        pthread_mutex_t mutex;
 424};
 425
 426static struct vhost_user_reconnect_list reconn_list;
 427static pthread_t reconn_tid;
 428
 429static int
 430vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz)
 431{
 432        int ret, flags;
 433
 434        ret = connect(fd, un, sz);
 435        if (ret < 0 && errno != EISCONN)
 436                return -1;
 437
 438        flags = fcntl(fd, F_GETFL, 0);
 439        if (flags < 0) {
 440                VHOST_LOG_CONFIG(ERR,
 441                        "can't get flags for connfd %d\n", fd);
 442                return -2;
 443        }
 444        if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) {
 445                VHOST_LOG_CONFIG(ERR,
 446                                "can't disable nonblocking on fd %d\n", fd);
 447                return -2;
 448        }
 449        return 0;
 450}
 451
 452static void *
 453vhost_user_client_reconnect(void *arg __rte_unused)
 454{
 455        int ret;
 456        struct vhost_user_reconnect *reconn, *next;
 457
 458        while (1) {
 459                pthread_mutex_lock(&reconn_list.mutex);
 460
 461                /*
 462                 * An equal implementation of TAILQ_FOREACH_SAFE,
 463                 * which does not exist on all platforms.
 464                 */
 465                for (reconn = TAILQ_FIRST(&reconn_list.head);
 466                     reconn != NULL; reconn = next) {
 467                        next = TAILQ_NEXT(reconn, next);
 468
 469                        ret = vhost_user_connect_nonblock(reconn->fd,
 470                                                (struct sockaddr *)&reconn->un,
 471                                                sizeof(reconn->un));
 472                        if (ret == -2) {
 473                                close(reconn->fd);
 474                                VHOST_LOG_CONFIG(ERR,
 475                                        "reconnection for fd %d failed\n",
 476                                        reconn->fd);
 477                                goto remove_fd;
 478                        }
 479                        if (ret == -1)
 480                                continue;
 481
 482                        VHOST_LOG_CONFIG(INFO,
 483                                "%s: connected\n", reconn->vsocket->path);
 484                        vhost_user_add_connection(reconn->fd, reconn->vsocket);
 485remove_fd:
 486                        TAILQ_REMOVE(&reconn_list.head, reconn, next);
 487                        free(reconn);
 488                }
 489
 490                pthread_mutex_unlock(&reconn_list.mutex);
 491                sleep(1);
 492        }
 493
 494        return NULL;
 495}
 496
 497static int
 498vhost_user_reconnect_init(void)
 499{
 500        int ret;
 501
 502        ret = pthread_mutex_init(&reconn_list.mutex, NULL);
 503        if (ret < 0) {
 504                VHOST_LOG_CONFIG(ERR, "failed to initialize mutex");
 505                return ret;
 506        }
 507        TAILQ_INIT(&reconn_list.head);
 508
 509        ret = rte_ctrl_thread_create(&reconn_tid, "vhost_reconn", NULL,
 510                             vhost_user_client_reconnect, NULL);
 511        if (ret != 0) {
 512                VHOST_LOG_CONFIG(ERR, "failed to create reconnect thread");
 513                if (pthread_mutex_destroy(&reconn_list.mutex)) {
 514                        VHOST_LOG_CONFIG(ERR,
 515                                "failed to destroy reconnect mutex");
 516                }
 517        }
 518
 519        return ret;
 520}
 521
 522static int
 523vhost_user_start_client(struct vhost_user_socket *vsocket)
 524{
 525        int ret;
 526        int fd = vsocket->socket_fd;
 527        const char *path = vsocket->path;
 528        struct vhost_user_reconnect *reconn;
 529
 530        ret = vhost_user_connect_nonblock(fd, (struct sockaddr *)&vsocket->un,
 531                                          sizeof(vsocket->un));
 532        if (ret == 0) {
 533                vhost_user_add_connection(fd, vsocket);
 534                return 0;
 535        }
 536
 537        VHOST_LOG_CONFIG(WARNING,
 538                "failed to connect to %s: %s\n",
 539                path, strerror(errno));
 540
 541        if (ret == -2 || !vsocket->reconnect) {
 542                close(fd);
 543                return -1;
 544        }
 545
 546        VHOST_LOG_CONFIG(INFO, "%s: reconnecting...\n", path);
 547        reconn = malloc(sizeof(*reconn));
 548        if (reconn == NULL) {
 549                VHOST_LOG_CONFIG(ERR,
 550                        "failed to allocate memory for reconnect\n");
 551                close(fd);
 552                return -1;
 553        }
 554        reconn->un = vsocket->un;
 555        reconn->fd = fd;
 556        reconn->vsocket = vsocket;
 557        pthread_mutex_lock(&reconn_list.mutex);
 558        TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next);
 559        pthread_mutex_unlock(&reconn_list.mutex);
 560
 561        return 0;
 562}
 563
 564static struct vhost_user_socket *
 565find_vhost_user_socket(const char *path)
 566{
 567        int i;
 568
 569        if (path == NULL)
 570                return NULL;
 571
 572        for (i = 0; i < vhost_user.vsocket_cnt; i++) {
 573                struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
 574
 575                if (!strcmp(vsocket->path, path))
 576                        return vsocket;
 577        }
 578
 579        return NULL;
 580}
 581
 582int
 583rte_vhost_driver_attach_vdpa_device(const char *path,
 584                struct rte_vdpa_device *dev)
 585{
 586        struct vhost_user_socket *vsocket;
 587
 588        if (dev == NULL || path == NULL)
 589                return -1;
 590
 591        pthread_mutex_lock(&vhost_user.mutex);
 592        vsocket = find_vhost_user_socket(path);
 593        if (vsocket)
 594                vsocket->vdpa_dev = dev;
 595        pthread_mutex_unlock(&vhost_user.mutex);
 596
 597        return vsocket ? 0 : -1;
 598}
 599
 600int
 601rte_vhost_driver_detach_vdpa_device(const char *path)
 602{
 603        struct vhost_user_socket *vsocket;
 604
 605        pthread_mutex_lock(&vhost_user.mutex);
 606        vsocket = find_vhost_user_socket(path);
 607        if (vsocket)
 608                vsocket->vdpa_dev = NULL;
 609        pthread_mutex_unlock(&vhost_user.mutex);
 610
 611        return vsocket ? 0 : -1;
 612}
 613
 614struct rte_vdpa_device *
 615rte_vhost_driver_get_vdpa_device(const char *path)
 616{
 617        struct vhost_user_socket *vsocket;
 618        struct rte_vdpa_device *dev = NULL;
 619
 620        pthread_mutex_lock(&vhost_user.mutex);
 621        vsocket = find_vhost_user_socket(path);
 622        if (vsocket)
 623                dev = vsocket->vdpa_dev;
 624        pthread_mutex_unlock(&vhost_user.mutex);
 625
 626        return dev;
 627}
 628
 629int
 630rte_vhost_driver_disable_features(const char *path, uint64_t features)
 631{
 632        struct vhost_user_socket *vsocket;
 633
 634        pthread_mutex_lock(&vhost_user.mutex);
 635        vsocket = find_vhost_user_socket(path);
 636
 637        /* Note that use_builtin_virtio_net is not affected by this function
 638         * since callers may want to selectively disable features of the
 639         * built-in vhost net device backend.
 640         */
 641
 642        if (vsocket)
 643                vsocket->features &= ~features;
 644        pthread_mutex_unlock(&vhost_user.mutex);
 645
 646        return vsocket ? 0 : -1;
 647}
 648
 649int
 650rte_vhost_driver_enable_features(const char *path, uint64_t features)
 651{
 652        struct vhost_user_socket *vsocket;
 653
 654        pthread_mutex_lock(&vhost_user.mutex);
 655        vsocket = find_vhost_user_socket(path);
 656        if (vsocket) {
 657                if ((vsocket->supported_features & features) != features) {
 658                        /*
 659                         * trying to enable features the driver doesn't
 660                         * support.
 661                         */
 662                        pthread_mutex_unlock(&vhost_user.mutex);
 663                        return -1;
 664                }
 665                vsocket->features |= features;
 666        }
 667        pthread_mutex_unlock(&vhost_user.mutex);
 668
 669        return vsocket ? 0 : -1;
 670}
 671
 672int
 673rte_vhost_driver_set_features(const char *path, uint64_t features)
 674{
 675        struct vhost_user_socket *vsocket;
 676
 677        pthread_mutex_lock(&vhost_user.mutex);
 678        vsocket = find_vhost_user_socket(path);
 679        if (vsocket) {
 680                vsocket->supported_features = features;
 681                vsocket->features = features;
 682
 683                /* Anyone setting feature bits is implementing their own vhost
 684                 * device backend.
 685                 */
 686                vsocket->use_builtin_virtio_net = false;
 687        }
 688        pthread_mutex_unlock(&vhost_user.mutex);
 689
 690        return vsocket ? 0 : -1;
 691}
 692
 693int
 694rte_vhost_driver_get_features(const char *path, uint64_t *features)
 695{
 696        struct vhost_user_socket *vsocket;
 697        uint64_t vdpa_features;
 698        struct rte_vdpa_device *vdpa_dev;
 699        int ret = 0;
 700
 701        pthread_mutex_lock(&vhost_user.mutex);
 702        vsocket = find_vhost_user_socket(path);
 703        if (!vsocket) {
 704                VHOST_LOG_CONFIG(ERR,
 705                        "socket file %s is not registered yet.\n", path);
 706                ret = -1;
 707                goto unlock_exit;
 708        }
 709
 710        vdpa_dev = vsocket->vdpa_dev;
 711        if (!vdpa_dev) {
 712                *features = vsocket->features;
 713                goto unlock_exit;
 714        }
 715
 716        if (vdpa_dev->ops->get_features(vdpa_dev, &vdpa_features) < 0) {
 717                VHOST_LOG_CONFIG(ERR,
 718                                "failed to get vdpa features "
 719                                "for socket file %s.\n", path);
 720                ret = -1;
 721                goto unlock_exit;
 722        }
 723
 724        *features = vsocket->features & vdpa_features;
 725
 726unlock_exit:
 727        pthread_mutex_unlock(&vhost_user.mutex);
 728        return ret;
 729}
 730
 731int
 732rte_vhost_driver_set_protocol_features(const char *path,
 733                uint64_t protocol_features)
 734{
 735        struct vhost_user_socket *vsocket;
 736
 737        pthread_mutex_lock(&vhost_user.mutex);
 738        vsocket = find_vhost_user_socket(path);
 739        if (vsocket)
 740                vsocket->protocol_features = protocol_features;
 741        pthread_mutex_unlock(&vhost_user.mutex);
 742        return vsocket ? 0 : -1;
 743}
 744
 745int
 746rte_vhost_driver_get_protocol_features(const char *path,
 747                uint64_t *protocol_features)
 748{
 749        struct vhost_user_socket *vsocket;
 750        uint64_t vdpa_protocol_features;
 751        struct rte_vdpa_device *vdpa_dev;
 752        int ret = 0;
 753
 754        pthread_mutex_lock(&vhost_user.mutex);
 755        vsocket = find_vhost_user_socket(path);
 756        if (!vsocket) {
 757                VHOST_LOG_CONFIG(ERR,
 758                        "socket file %s is not registered yet.\n", path);
 759                ret = -1;
 760                goto unlock_exit;
 761        }
 762
 763        vdpa_dev = vsocket->vdpa_dev;
 764        if (!vdpa_dev) {
 765                *protocol_features = vsocket->protocol_features;
 766                goto unlock_exit;
 767        }
 768
 769        if (vdpa_dev->ops->get_protocol_features(vdpa_dev,
 770                                &vdpa_protocol_features) < 0) {
 771                VHOST_LOG_CONFIG(ERR,
 772                                "failed to get vdpa protocol features "
 773                                "for socket file %s.\n", path);
 774                ret = -1;
 775                goto unlock_exit;
 776        }
 777
 778        *protocol_features = vsocket->protocol_features
 779                & vdpa_protocol_features;
 780
 781unlock_exit:
 782        pthread_mutex_unlock(&vhost_user.mutex);
 783        return ret;
 784}
 785
 786int
 787rte_vhost_driver_get_queue_num(const char *path, uint32_t *queue_num)
 788{
 789        struct vhost_user_socket *vsocket;
 790        uint32_t vdpa_queue_num;
 791        struct rte_vdpa_device *vdpa_dev;
 792        int ret = 0;
 793
 794        pthread_mutex_lock(&vhost_user.mutex);
 795        vsocket = find_vhost_user_socket(path);
 796        if (!vsocket) {
 797                VHOST_LOG_CONFIG(ERR,
 798                        "socket file %s is not registered yet.\n", path);
 799                ret = -1;
 800                goto unlock_exit;
 801        }
 802
 803        vdpa_dev = vsocket->vdpa_dev;
 804        if (!vdpa_dev) {
 805                *queue_num = VHOST_MAX_QUEUE_PAIRS;
 806                goto unlock_exit;
 807        }
 808
 809        if (vdpa_dev->ops->get_queue_num(vdpa_dev, &vdpa_queue_num) < 0) {
 810                VHOST_LOG_CONFIG(ERR,
 811                                "failed to get vdpa queue number "
 812                                "for socket file %s.\n", path);
 813                ret = -1;
 814                goto unlock_exit;
 815        }
 816
 817        *queue_num = RTE_MIN((uint32_t)VHOST_MAX_QUEUE_PAIRS, vdpa_queue_num);
 818
 819unlock_exit:
 820        pthread_mutex_unlock(&vhost_user.mutex);
 821        return ret;
 822}
 823
 824static void
 825vhost_user_socket_mem_free(struct vhost_user_socket *vsocket)
 826{
 827        if (vsocket && vsocket->path) {
 828                free(vsocket->path);
 829                vsocket->path = NULL;
 830        }
 831
 832        if (vsocket) {
 833                free(vsocket);
 834                vsocket = NULL;
 835        }
 836}
 837
 838/*
 839 * Register a new vhost-user socket; here we could act as server
 840 * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag
 841 * is set.
 842 */
 843int
 844rte_vhost_driver_register(const char *path, uint64_t flags)
 845{
 846        int ret = -1;
 847        struct vhost_user_socket *vsocket;
 848
 849        if (!path)
 850                return -1;
 851
 852        pthread_mutex_lock(&vhost_user.mutex);
 853
 854        if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
 855                VHOST_LOG_CONFIG(ERR,
 856                        "error: the number of vhost sockets reaches maximum\n");
 857                goto out;
 858        }
 859
 860        vsocket = malloc(sizeof(struct vhost_user_socket));
 861        if (!vsocket)
 862                goto out;
 863        memset(vsocket, 0, sizeof(struct vhost_user_socket));
 864        vsocket->path = strdup(path);
 865        if (vsocket->path == NULL) {
 866                VHOST_LOG_CONFIG(ERR,
 867                        "error: failed to copy socket path string\n");
 868                vhost_user_socket_mem_free(vsocket);
 869                goto out;
 870        }
 871        TAILQ_INIT(&vsocket->conn_list);
 872        ret = pthread_mutex_init(&vsocket->conn_mutex, NULL);
 873        if (ret) {
 874                VHOST_LOG_CONFIG(ERR,
 875                        "error: failed to init connection mutex\n");
 876                goto out_free;
 877        }
 878        vsocket->vdpa_dev = NULL;
 879        vsocket->extbuf = flags & RTE_VHOST_USER_EXTBUF_SUPPORT;
 880        vsocket->linearbuf = flags & RTE_VHOST_USER_LINEARBUF_SUPPORT;
 881        vsocket->async_copy = flags & RTE_VHOST_USER_ASYNC_COPY;
 882        vsocket->net_compliant_ol_flags = flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS;
 883
 884        if (vsocket->async_copy &&
 885                (flags & (RTE_VHOST_USER_IOMMU_SUPPORT |
 886                RTE_VHOST_USER_POSTCOPY_SUPPORT))) {
 887                VHOST_LOG_CONFIG(ERR, "error: enabling async copy and IOMMU "
 888                        "or post-copy feature simultaneously is not "
 889                        "supported\n");
 890                goto out_mutex;
 891        }
 892
 893        /*
 894         * Set the supported features correctly for the builtin vhost-user
 895         * net driver.
 896         *
 897         * Applications know nothing about features the builtin virtio net
 898         * driver (virtio_net.c) supports, thus it's not possible for them
 899         * to invoke rte_vhost_driver_set_features(). To workaround it, here
 900         * we set it unconditionally. If the application want to implement
 901         * another vhost-user driver (say SCSI), it should call the
 902         * rte_vhost_driver_set_features(), which will overwrite following
 903         * two values.
 904         */
 905        vsocket->use_builtin_virtio_net = true;
 906        vsocket->supported_features = VIRTIO_NET_SUPPORTED_FEATURES;
 907        vsocket->features           = VIRTIO_NET_SUPPORTED_FEATURES;
 908        vsocket->protocol_features  = VHOST_USER_PROTOCOL_FEATURES;
 909
 910        if (vsocket->async_copy) {
 911                vsocket->supported_features &= ~(1ULL << VHOST_F_LOG_ALL);
 912                vsocket->features &= ~(1ULL << VHOST_F_LOG_ALL);
 913                VHOST_LOG_CONFIG(INFO,
 914                        "Logging feature is disabled in async copy mode\n");
 915        }
 916
 917        /*
 918         * We'll not be able to receive a buffer from guest in linear mode
 919         * without external buffer if it will not fit in a single mbuf, which is
 920         * likely if segmentation offloading enabled.
 921         */
 922        if (vsocket->linearbuf && !vsocket->extbuf) {
 923                uint64_t seg_offload_features =
 924                                (1ULL << VIRTIO_NET_F_HOST_TSO4) |
 925                                (1ULL << VIRTIO_NET_F_HOST_TSO6) |
 926                                (1ULL << VIRTIO_NET_F_HOST_UFO);
 927
 928                VHOST_LOG_CONFIG(INFO,
 929                        "Linear buffers requested without external buffers, "
 930                        "disabling host segmentation offloading support\n");
 931                vsocket->supported_features &= ~seg_offload_features;
 932                vsocket->features &= ~seg_offload_features;
 933        }
 934
 935        if (!(flags & RTE_VHOST_USER_IOMMU_SUPPORT)) {
 936                vsocket->supported_features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 937                vsocket->features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 938        }
 939
 940        if (!(flags & RTE_VHOST_USER_POSTCOPY_SUPPORT)) {
 941                vsocket->protocol_features &=
 942                        ~(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT);
 943        } else {
 944#ifndef RTE_LIBRTE_VHOST_POSTCOPY
 945                VHOST_LOG_CONFIG(ERR,
 946                        "Postcopy requested but not compiled\n");
 947                ret = -1;
 948                goto out_mutex;
 949#endif
 950        }
 951
 952        if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
 953                vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT);
 954                if (vsocket->reconnect && reconn_tid == 0) {
 955                        if (vhost_user_reconnect_init() != 0)
 956                                goto out_mutex;
 957                }
 958        } else {
 959                vsocket->is_server = true;
 960        }
 961        ret = create_unix_socket(vsocket);
 962        if (ret < 0) {
 963                goto out_mutex;
 964        }
 965
 966        vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket;
 967
 968        pthread_mutex_unlock(&vhost_user.mutex);
 969        return ret;
 970
 971out_mutex:
 972        if (pthread_mutex_destroy(&vsocket->conn_mutex)) {
 973                VHOST_LOG_CONFIG(ERR,
 974                        "error: failed to destroy connection mutex\n");
 975        }
 976out_free:
 977        vhost_user_socket_mem_free(vsocket);
 978out:
 979        pthread_mutex_unlock(&vhost_user.mutex);
 980
 981        return ret;
 982}
 983
 984static bool
 985vhost_user_remove_reconnect(struct vhost_user_socket *vsocket)
 986{
 987        int found = false;
 988        struct vhost_user_reconnect *reconn, *next;
 989
 990        pthread_mutex_lock(&reconn_list.mutex);
 991
 992        for (reconn = TAILQ_FIRST(&reconn_list.head);
 993             reconn != NULL; reconn = next) {
 994                next = TAILQ_NEXT(reconn, next);
 995
 996                if (reconn->vsocket == vsocket) {
 997                        TAILQ_REMOVE(&reconn_list.head, reconn, next);
 998                        close(reconn->fd);
 999                        free(reconn);
1000                        found = true;
1001                        break;
1002                }
1003        }
1004        pthread_mutex_unlock(&reconn_list.mutex);
1005        return found;
1006}
1007
1008/**
1009 * Unregister the specified vhost socket
1010 */
1011int
1012rte_vhost_driver_unregister(const char *path)
1013{
1014        int i;
1015        int count;
1016        struct vhost_user_connection *conn, *next;
1017
1018        if (path == NULL)
1019                return -1;
1020
1021again:
1022        pthread_mutex_lock(&vhost_user.mutex);
1023
1024        for (i = 0; i < vhost_user.vsocket_cnt; i++) {
1025                struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
1026
1027                if (!strcmp(vsocket->path, path)) {
1028                        pthread_mutex_lock(&vsocket->conn_mutex);
1029                        for (conn = TAILQ_FIRST(&vsocket->conn_list);
1030                             conn != NULL;
1031                             conn = next) {
1032                                next = TAILQ_NEXT(conn, next);
1033
1034                                /*
1035                                 * If r/wcb is executing, release vsocket's
1036                                 * conn_mutex and vhost_user's mutex locks, and
1037                                 * try again since the r/wcb may use the
1038                                 * conn_mutex and mutex locks.
1039                                 */
1040                                if (fdset_try_del(&vhost_user.fdset,
1041                                                  conn->connfd) == -1) {
1042                                        pthread_mutex_unlock(
1043                                                        &vsocket->conn_mutex);
1044                                        pthread_mutex_unlock(&vhost_user.mutex);
1045                                        goto again;
1046                                }
1047
1048                                VHOST_LOG_CONFIG(INFO,
1049                                        "free connfd = %d for device '%s'\n",
1050                                        conn->connfd, path);
1051                                close(conn->connfd);
1052                                vhost_destroy_device(conn->vid);
1053                                TAILQ_REMOVE(&vsocket->conn_list, conn, next);
1054                                free(conn);
1055                        }
1056                        pthread_mutex_unlock(&vsocket->conn_mutex);
1057
1058                        if (vsocket->is_server) {
1059                                /*
1060                                 * If r/wcb is executing, release vhost_user's
1061                                 * mutex lock, and try again since the r/wcb
1062                                 * may use the mutex lock.
1063                                 */
1064                                if (fdset_try_del(&vhost_user.fdset,
1065                                                vsocket->socket_fd) == -1) {
1066                                        pthread_mutex_unlock(&vhost_user.mutex);
1067                                        goto again;
1068                                }
1069
1070                                close(vsocket->socket_fd);
1071                                unlink(path);
1072                        } else if (vsocket->reconnect) {
1073                                vhost_user_remove_reconnect(vsocket);
1074                        }
1075
1076                        pthread_mutex_destroy(&vsocket->conn_mutex);
1077                        vhost_user_socket_mem_free(vsocket);
1078
1079                        count = --vhost_user.vsocket_cnt;
1080                        vhost_user.vsockets[i] = vhost_user.vsockets[count];
1081                        vhost_user.vsockets[count] = NULL;
1082                        pthread_mutex_unlock(&vhost_user.mutex);
1083
1084                        return 0;
1085                }
1086        }
1087        pthread_mutex_unlock(&vhost_user.mutex);
1088
1089        return -1;
1090}
1091
1092/*
1093 * Register ops so that we can add/remove device to data core.
1094 */
1095int
1096rte_vhost_driver_callback_register(const char *path,
1097        struct vhost_device_ops const * const ops)
1098{
1099        struct vhost_user_socket *vsocket;
1100
1101        pthread_mutex_lock(&vhost_user.mutex);
1102        vsocket = find_vhost_user_socket(path);
1103        if (vsocket)
1104                vsocket->notify_ops = ops;
1105        pthread_mutex_unlock(&vhost_user.mutex);
1106
1107        return vsocket ? 0 : -1;
1108}
1109
1110struct vhost_device_ops const *
1111vhost_driver_callback_get(const char *path)
1112{
1113        struct vhost_user_socket *vsocket;
1114
1115        pthread_mutex_lock(&vhost_user.mutex);
1116        vsocket = find_vhost_user_socket(path);
1117        pthread_mutex_unlock(&vhost_user.mutex);
1118
1119        return vsocket ? vsocket->notify_ops : NULL;
1120}
1121
1122int
1123rte_vhost_driver_start(const char *path)
1124{
1125        struct vhost_user_socket *vsocket;
1126        static pthread_t fdset_tid;
1127
1128        pthread_mutex_lock(&vhost_user.mutex);
1129        vsocket = find_vhost_user_socket(path);
1130        pthread_mutex_unlock(&vhost_user.mutex);
1131
1132        if (!vsocket)
1133                return -1;
1134
1135        if (fdset_tid == 0) {
1136                /**
1137                 * create a pipe which will be waited by poll and notified to
1138                 * rebuild the wait list of poll.
1139                 */
1140                if (fdset_pipe_init(&vhost_user.fdset) < 0) {
1141                        VHOST_LOG_CONFIG(ERR,
1142                                "failed to create pipe for vhost fdset\n");
1143                        return -1;
1144                }
1145
1146                int ret = rte_ctrl_thread_create(&fdset_tid,
1147                        "vhost-events", NULL, fdset_event_dispatch,
1148                        &vhost_user.fdset);
1149                if (ret != 0) {
1150                        VHOST_LOG_CONFIG(ERR,
1151                                "failed to create fdset handling thread");
1152
1153                        fdset_pipe_uninit(&vhost_user.fdset);
1154                        return -1;
1155                }
1156        }
1157
1158        if (vsocket->is_server)
1159                return vhost_user_start_server(vsocket);
1160        else
1161                return vhost_user_start_client(vsocket);
1162}
1163