qemu/tests/vhost-user-bridge.c
<<
>>
Prefs
   1/*
   2 * Vhost User Bridge
   3 *
   4 * Copyright (c) 2015 Red Hat, Inc.
   5 *
   6 * Authors:
   7 *  Victor Kaplansky <victork@redhat.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2 or
  10 * later.  See the COPYING file in the top-level directory.
  11 */
  12
  13/*
  14 * TODO:
  15 *     - main should get parameters from the command line.
  16 *     - implement all request handlers. Still not implemented:
  17 *          vubr_get_queue_num_exec()
  18 *          vubr_send_rarp_exec()
  19 *     - test for broken requests and virtqueue.
  20 *     - implement features defined by Virtio 1.0 spec.
  21 *     - support mergeable buffers and indirect descriptors.
  22 *     - implement clean shutdown.
  23 *     - implement non-blocking writes to UDP backend.
  24 *     - implement polling strategy.
  25 *     - implement clean starting/stopping of vq processing
  26 *     - implement clean starting/stopping of used and buffers
  27 *       dirty page logging.
  28 */
  29
  30#define _FILE_OFFSET_BITS 64
  31
  32#include "qemu/osdep.h"
  33#include "qemu/iov.h"
  34#include "standard-headers/linux/virtio_net.h"
  35#include "contrib/libvhost-user/libvhost-user.h"
  36
  37#define VHOST_USER_BRIDGE_DEBUG 1
  38
  39#define DPRINT(...) \
  40    do { \
  41        if (VHOST_USER_BRIDGE_DEBUG) { \
  42            printf(__VA_ARGS__); \
  43        } \
  44    } while (0)
  45
  46typedef void (*CallbackFunc)(int sock, void *ctx);
  47
  48typedef struct Event {
  49    void *ctx;
  50    CallbackFunc callback;
  51} Event;
  52
  53typedef struct Dispatcher {
  54    int max_sock;
  55    fd_set fdset;
  56    Event events[FD_SETSIZE];
  57} Dispatcher;
  58
  59typedef struct VubrDev {
  60    VuDev vudev;
  61    Dispatcher dispatcher;
  62    int backend_udp_sock;
  63    struct sockaddr_in backend_udp_dest;
  64    int hdrlen;
  65    int sock;
  66    int ready;
  67    int quit;
  68} VubrDev;
  69
  70static void
  71vubr_die(const char *s)
  72{
  73    perror(s);
  74    exit(1);
  75}
  76
  77static int
  78dispatcher_init(Dispatcher *dispr)
  79{
  80    FD_ZERO(&dispr->fdset);
  81    dispr->max_sock = -1;
  82    return 0;
  83}
  84
  85static int
  86dispatcher_add(Dispatcher *dispr, int sock, void *ctx, CallbackFunc cb)
  87{
  88    if (sock >= FD_SETSIZE) {
  89        fprintf(stderr,
  90                "Error: Failed to add new event. sock %d should be less than %d\n",
  91                sock, FD_SETSIZE);
  92        return -1;
  93    }
  94
  95    dispr->events[sock].ctx = ctx;
  96    dispr->events[sock].callback = cb;
  97
  98    FD_SET(sock, &dispr->fdset);
  99    if (sock > dispr->max_sock) {
 100        dispr->max_sock = sock;
 101    }
 102    DPRINT("Added sock %d for watching. max_sock: %d\n",
 103           sock, dispr->max_sock);
 104    return 0;
 105}
 106
 107static int
 108dispatcher_remove(Dispatcher *dispr, int sock)
 109{
 110    if (sock >= FD_SETSIZE) {
 111        fprintf(stderr,
 112                "Error: Failed to remove event. sock %d should be less than %d\n",
 113                sock, FD_SETSIZE);
 114        return -1;
 115    }
 116
 117    FD_CLR(sock, &dispr->fdset);
 118    DPRINT("Sock %d removed from dispatcher watch.\n", sock);
 119    return 0;
 120}
 121
 122/* timeout in us */
 123static int
 124dispatcher_wait(Dispatcher *dispr, uint32_t timeout)
 125{
 126    struct timeval tv;
 127    tv.tv_sec = timeout / 1000000;
 128    tv.tv_usec = timeout % 1000000;
 129
 130    fd_set fdset = dispr->fdset;
 131
 132    /* wait until some of sockets become readable. */
 133    int rc = select(dispr->max_sock + 1, &fdset, 0, 0, &tv);
 134
 135    if (rc == -1) {
 136        vubr_die("select");
 137    }
 138
 139    /* Timeout */
 140    if (rc == 0) {
 141        return 0;
 142    }
 143
 144    /* Now call callback for every ready socket. */
 145
 146    int sock;
 147    for (sock = 0; sock < dispr->max_sock + 1; sock++) {
 148        /* The callback on a socket can remove other sockets from the
 149         * dispatcher, thus we have to check that the socket is
 150         * still not removed from dispatcher's list
 151         */
 152        if (FD_ISSET(sock, &fdset) && FD_ISSET(sock, &dispr->fdset)) {
 153            Event *e = &dispr->events[sock];
 154            e->callback(sock, e->ctx);
 155        }
 156    }
 157
 158    return 0;
 159}
 160
 161static void
 162vubr_handle_tx(VuDev *dev, int qidx)
 163{
 164    VuVirtq *vq = vu_get_queue(dev, qidx);
 165    VubrDev *vubr = container_of(dev, VubrDev, vudev);
 166    int hdrlen = vubr->hdrlen;
 167    VuVirtqElement *elem = NULL;
 168
 169    assert(qidx % 2);
 170
 171    for (;;) {
 172        ssize_t ret;
 173        unsigned int out_num;
 174        struct iovec sg[VIRTQUEUE_MAX_SIZE], *out_sg;
 175
 176        elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement));
 177        if (!elem) {
 178            break;
 179        }
 180
 181        out_num = elem->out_num;
 182        out_sg = elem->out_sg;
 183        if (out_num < 1) {
 184            fprintf(stderr, "virtio-net header not in first element\n");
 185            break;
 186        }
 187        if (VHOST_USER_BRIDGE_DEBUG) {
 188            iov_hexdump(out_sg, out_num, stderr, "TX:", 1024);
 189        }
 190
 191        if (hdrlen) {
 192            unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
 193                                       out_sg, out_num,
 194                                       hdrlen, -1);
 195            out_num = sg_num;
 196            out_sg = sg;
 197        }
 198
 199        struct msghdr msg = {
 200            .msg_name = (struct sockaddr *) &vubr->backend_udp_dest,
 201            .msg_namelen = sizeof(struct sockaddr_in),
 202            .msg_iov = out_sg,
 203            .msg_iovlen = out_num,
 204        };
 205        do {
 206            ret = sendmsg(vubr->backend_udp_sock, &msg, 0);
 207        } while (ret == -1 && (errno == EAGAIN || errno == EINTR));
 208
 209        if (ret == -1) {
 210            vubr_die("sendmsg()");
 211        }
 212
 213        vu_queue_push(dev, vq, elem, 0);
 214        vu_queue_notify(dev, vq);
 215
 216        free(elem);
 217        elem = NULL;
 218    }
 219
 220    free(elem);
 221}
 222
 223
 224/* this function reverse the effect of iov_discard_front() it must be
 225 * called with 'front' being the original struct iovec and 'bytes'
 226 * being the number of bytes you shaved off
 227 */
 228static void
 229iov_restore_front(struct iovec *front, struct iovec *iov, size_t bytes)
 230{
 231    struct iovec *cur;
 232
 233    for (cur = front; cur != iov; cur++) {
 234        assert(bytes >= cur->iov_len);
 235        bytes -= cur->iov_len;
 236    }
 237
 238    cur->iov_base -= bytes;
 239    cur->iov_len += bytes;
 240}
 241
 242static void
 243iov_truncate(struct iovec *iov, unsigned iovc, size_t bytes)
 244{
 245    unsigned i;
 246
 247    for (i = 0; i < iovc; i++, iov++) {
 248        if (bytes < iov->iov_len) {
 249            iov->iov_len = bytes;
 250            return;
 251        }
 252
 253        bytes -= iov->iov_len;
 254    }
 255
 256    assert(!"couldn't truncate iov");
 257}
 258
 259static void
 260vubr_backend_recv_cb(int sock, void *ctx)
 261{
 262    VubrDev *vubr = (VubrDev *) ctx;
 263    VuDev *dev = &vubr->vudev;
 264    VuVirtq *vq = vu_get_queue(dev, 0);
 265    VuVirtqElement *elem = NULL;
 266    struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
 267    struct virtio_net_hdr_mrg_rxbuf mhdr;
 268    unsigned mhdr_cnt = 0;
 269    int hdrlen = vubr->hdrlen;
 270    int i = 0;
 271    struct virtio_net_hdr hdr = {
 272        .flags = 0,
 273        .gso_type = VIRTIO_NET_HDR_GSO_NONE
 274    };
 275
 276    DPRINT("\n\n   ***   IN UDP RECEIVE CALLBACK    ***\n\n");
 277    DPRINT("    hdrlen = %d\n", hdrlen);
 278
 279    if (!vu_queue_enabled(dev, vq) ||
 280        !vu_queue_avail_bytes(dev, vq, hdrlen, 0)) {
 281        DPRINT("Got UDP packet, but no available descriptors on RX virtq.\n");
 282        return;
 283    }
 284
 285    do {
 286        struct iovec *sg;
 287        ssize_t ret, total = 0;
 288        unsigned int num;
 289
 290        elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement));
 291        if (!elem) {
 292            break;
 293        }
 294
 295        if (elem->in_num < 1) {
 296            fprintf(stderr, "virtio-net contains no in buffers\n");
 297            break;
 298        }
 299
 300        sg = elem->in_sg;
 301        num = elem->in_num;
 302        if (i == 0) {
 303            if (hdrlen == 12) {
 304                mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
 305                                    sg, elem->in_num,
 306                                    offsetof(typeof(mhdr), num_buffers),
 307                                    sizeof(mhdr.num_buffers));
 308            }
 309            iov_from_buf(sg, elem->in_num, 0, &hdr, sizeof hdr);
 310            total += hdrlen;
 311            ret = iov_discard_front(&sg, &num, hdrlen);
 312            assert(ret == hdrlen);
 313        }
 314
 315        struct msghdr msg = {
 316            .msg_name = (struct sockaddr *) &vubr->backend_udp_dest,
 317            .msg_namelen = sizeof(struct sockaddr_in),
 318            .msg_iov = sg,
 319            .msg_iovlen = elem->in_num,
 320            .msg_flags = MSG_DONTWAIT,
 321        };
 322        do {
 323            ret = recvmsg(vubr->backend_udp_sock, &msg, 0);
 324        } while (ret == -1 && (errno == EINTR));
 325
 326        if (i == 0) {
 327            iov_restore_front(elem->in_sg, sg, hdrlen);
 328        }
 329
 330        if (ret == -1) {
 331            if (errno == EWOULDBLOCK) {
 332                vu_queue_rewind(dev, vq, 1);
 333                break;
 334            }
 335
 336            vubr_die("recvmsg()");
 337        }
 338
 339        total += ret;
 340        iov_truncate(elem->in_sg, elem->in_num, total);
 341        vu_queue_fill(dev, vq, elem, total, i++);
 342
 343        free(elem);
 344        elem = NULL;
 345    } while (false); /* could loop if DONTWAIT worked? */
 346
 347    if (mhdr_cnt) {
 348        mhdr.num_buffers = i;
 349        iov_from_buf(mhdr_sg, mhdr_cnt,
 350                     0,
 351                     &mhdr.num_buffers, sizeof mhdr.num_buffers);
 352    }
 353
 354    vu_queue_flush(dev, vq, i);
 355    vu_queue_notify(dev, vq);
 356
 357    free(elem);
 358}
 359
 360static void
 361vubr_receive_cb(int sock, void *ctx)
 362{
 363    VubrDev *vubr = (VubrDev *)ctx;
 364
 365    if (!vu_dispatch(&vubr->vudev)) {
 366        fprintf(stderr, "Error while dispatching\n");
 367    }
 368}
 369
 370typedef struct WatchData {
 371    VuDev *dev;
 372    vu_watch_cb cb;
 373    void *data;
 374} WatchData;
 375
 376static void
 377watch_cb(int sock, void *ctx)
 378{
 379    struct WatchData *wd = ctx;
 380
 381    wd->cb(wd->dev, VU_WATCH_IN, wd->data);
 382}
 383
 384static void
 385vubr_set_watch(VuDev *dev, int fd, int condition,
 386               vu_watch_cb cb, void *data)
 387{
 388    VubrDev *vubr = container_of(dev, VubrDev, vudev);
 389    static WatchData watches[FD_SETSIZE];
 390    struct WatchData *wd = &watches[fd];
 391
 392    wd->cb = cb;
 393    wd->data = data;
 394    wd->dev = dev;
 395    dispatcher_add(&vubr->dispatcher, fd, wd, watch_cb);
 396}
 397
 398static void
 399vubr_remove_watch(VuDev *dev, int fd)
 400{
 401    VubrDev *vubr = container_of(dev, VubrDev, vudev);
 402
 403    dispatcher_remove(&vubr->dispatcher, fd);
 404}
 405
 406static int
 407vubr_send_rarp_exec(VuDev *dev, VhostUserMsg *vmsg)
 408{
 409    DPRINT("Function %s() not implemented yet.\n", __func__);
 410    return 0;
 411}
 412
 413static int
 414vubr_process_msg(VuDev *dev, VhostUserMsg *vmsg, int *do_reply)
 415{
 416    switch (vmsg->request) {
 417    case VHOST_USER_SEND_RARP:
 418        *do_reply = vubr_send_rarp_exec(dev, vmsg);
 419        return 1;
 420    default:
 421        /* let the library handle the rest */
 422        return 0;
 423    }
 424
 425    return 0;
 426}
 427
 428static void
 429vubr_set_features(VuDev *dev, uint64_t features)
 430{
 431    VubrDev *vubr = container_of(dev, VubrDev, vudev);
 432
 433    if ((features & (1ULL << VIRTIO_F_VERSION_1)) ||
 434        (features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))) {
 435        vubr->hdrlen = 12;
 436    } else {
 437        vubr->hdrlen = 10;
 438    }
 439}
 440
 441static uint64_t
 442vubr_get_features(VuDev *dev)
 443{
 444    return 1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE |
 445        1ULL << VIRTIO_NET_F_MRG_RXBUF;
 446}
 447
 448static void
 449vubr_queue_set_started(VuDev *dev, int qidx, bool started)
 450{
 451    VuVirtq *vq = vu_get_queue(dev, qidx);
 452
 453    if (qidx % 2 == 1) {
 454        vu_set_queue_handler(dev, vq, started ? vubr_handle_tx : NULL);
 455    }
 456}
 457
 458static void
 459vubr_panic(VuDev *dev, const char *msg)
 460{
 461    VubrDev *vubr = container_of(dev, VubrDev, vudev);
 462
 463    fprintf(stderr, "PANIC: %s\n", msg);
 464
 465    dispatcher_remove(&vubr->dispatcher, dev->sock);
 466    vubr->quit = 1;
 467}
 468
 469static bool
 470vubr_queue_is_processed_in_order(VuDev *dev, int qidx)
 471{
 472    return true;
 473}
 474
 475static const VuDevIface vuiface = {
 476    .get_features = vubr_get_features,
 477    .set_features = vubr_set_features,
 478    .process_msg = vubr_process_msg,
 479    .queue_set_started = vubr_queue_set_started,
 480    .queue_is_processed_in_order = vubr_queue_is_processed_in_order,
 481};
 482
 483static void
 484vubr_accept_cb(int sock, void *ctx)
 485{
 486    VubrDev *dev = (VubrDev *)ctx;
 487    int conn_fd;
 488    struct sockaddr_un un;
 489    socklen_t len = sizeof(un);
 490
 491    conn_fd = accept(sock, (struct sockaddr *) &un, &len);
 492    if (conn_fd == -1) {
 493        vubr_die("accept()");
 494    }
 495    DPRINT("Got connection from remote peer on sock %d\n", conn_fd);
 496
 497    vu_init(&dev->vudev,
 498            conn_fd,
 499            vubr_panic,
 500            vubr_set_watch,
 501            vubr_remove_watch,
 502            &vuiface);
 503
 504    dispatcher_add(&dev->dispatcher, conn_fd, ctx, vubr_receive_cb);
 505    dispatcher_remove(&dev->dispatcher, sock);
 506}
 507
 508static VubrDev *
 509vubr_new(const char *path, bool client)
 510{
 511    VubrDev *dev = (VubrDev *) calloc(1, sizeof(VubrDev));
 512    struct sockaddr_un un;
 513    CallbackFunc cb;
 514    size_t len;
 515
 516    /* Get a UNIX socket. */
 517    dev->sock = socket(AF_UNIX, SOCK_STREAM, 0);
 518    if (dev->sock == -1) {
 519        vubr_die("socket");
 520    }
 521
 522    un.sun_family = AF_UNIX;
 523    strcpy(un.sun_path, path);
 524    len = sizeof(un.sun_family) + strlen(path);
 525
 526    if (!client) {
 527        unlink(path);
 528
 529        if (bind(dev->sock, (struct sockaddr *) &un, len) == -1) {
 530            vubr_die("bind");
 531        }
 532
 533        if (listen(dev->sock, 1) == -1) {
 534            vubr_die("listen");
 535        }
 536        cb = vubr_accept_cb;
 537
 538        DPRINT("Waiting for connections on UNIX socket %s ...\n", path);
 539    } else {
 540        if (connect(dev->sock, (struct sockaddr *)&un, len) == -1) {
 541            vubr_die("connect");
 542        }
 543        vu_init(&dev->vudev,
 544                dev->sock,
 545                vubr_panic,
 546                vubr_set_watch,
 547                vubr_remove_watch,
 548                &vuiface);
 549        cb = vubr_receive_cb;
 550    }
 551
 552    dispatcher_init(&dev->dispatcher);
 553
 554    dispatcher_add(&dev->dispatcher, dev->sock, (void *)dev, cb);
 555
 556    return dev;
 557}
 558
 559static void
 560vubr_set_host(struct sockaddr_in *saddr, const char *host)
 561{
 562    if (isdigit(host[0])) {
 563        if (!inet_aton(host, &saddr->sin_addr)) {
 564            fprintf(stderr, "inet_aton() failed.\n");
 565            exit(1);
 566        }
 567    } else {
 568        struct hostent *he = gethostbyname(host);
 569
 570        if (!he) {
 571            fprintf(stderr, "gethostbyname() failed.\n");
 572            exit(1);
 573        }
 574        saddr->sin_addr = *(struct in_addr *)he->h_addr;
 575    }
 576}
 577
 578static void
 579vubr_backend_udp_setup(VubrDev *dev,
 580                       const char *local_host,
 581                       const char *local_port,
 582                       const char *remote_host,
 583                       const char *remote_port)
 584{
 585    int sock;
 586    const char *r;
 587
 588    int lport, rport;
 589
 590    lport = strtol(local_port, (char **)&r, 0);
 591    if (r == local_port) {
 592        fprintf(stderr, "lport parsing failed.\n");
 593        exit(1);
 594    }
 595
 596    rport = strtol(remote_port, (char **)&r, 0);
 597    if (r == remote_port) {
 598        fprintf(stderr, "rport parsing failed.\n");
 599        exit(1);
 600    }
 601
 602    struct sockaddr_in si_local = {
 603        .sin_family = AF_INET,
 604        .sin_port = htons(lport),
 605    };
 606
 607    vubr_set_host(&si_local, local_host);
 608
 609    /* setup destination for sends */
 610    dev->backend_udp_dest = (struct sockaddr_in) {
 611        .sin_family = AF_INET,
 612        .sin_port = htons(rport),
 613    };
 614    vubr_set_host(&dev->backend_udp_dest, remote_host);
 615
 616    sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
 617    if (sock == -1) {
 618        vubr_die("socket");
 619    }
 620
 621    if (bind(sock, (struct sockaddr *)&si_local, sizeof(si_local)) == -1) {
 622        vubr_die("bind");
 623    }
 624
 625    dev->backend_udp_sock = sock;
 626    dispatcher_add(&dev->dispatcher, sock, dev, vubr_backend_recv_cb);
 627    DPRINT("Waiting for data from udp backend on %s:%d...\n",
 628           local_host, lport);
 629}
 630
 631static void
 632vubr_run(VubrDev *dev)
 633{
 634    while (!dev->quit) {
 635        /* timeout 200ms */
 636        dispatcher_wait(&dev->dispatcher, 200000);
 637        /* Here one can try polling strategy. */
 638    }
 639}
 640
 641static int
 642vubr_parse_host_port(const char **host, const char **port, const char *buf)
 643{
 644    char *p = strchr(buf, ':');
 645
 646    if (!p) {
 647        return -1;
 648    }
 649    *p = '\0';
 650    *host = strdup(buf);
 651    *port = strdup(p + 1);
 652    return 0;
 653}
 654
 655#define DEFAULT_UD_SOCKET "/tmp/vubr.sock"
 656#define DEFAULT_LHOST "127.0.0.1"
 657#define DEFAULT_LPORT "4444"
 658#define DEFAULT_RHOST "127.0.0.1"
 659#define DEFAULT_RPORT "5555"
 660
 661static const char *ud_socket_path = DEFAULT_UD_SOCKET;
 662static const char *lhost = DEFAULT_LHOST;
 663static const char *lport = DEFAULT_LPORT;
 664static const char *rhost = DEFAULT_RHOST;
 665static const char *rport = DEFAULT_RPORT;
 666
 667int
 668main(int argc, char *argv[])
 669{
 670    VubrDev *dev;
 671    int opt;
 672    bool client = false;
 673
 674    while ((opt = getopt(argc, argv, "l:r:u:c")) != -1) {
 675
 676        switch (opt) {
 677        case 'l':
 678            if (vubr_parse_host_port(&lhost, &lport, optarg) < 0) {
 679                goto out;
 680            }
 681            break;
 682        case 'r':
 683            if (vubr_parse_host_port(&rhost, &rport, optarg) < 0) {
 684                goto out;
 685            }
 686            break;
 687        case 'u':
 688            ud_socket_path = strdup(optarg);
 689            break;
 690        case 'c':
 691            client = true;
 692            break;
 693        default:
 694            goto out;
 695        }
 696    }
 697
 698    DPRINT("ud socket: %s (%s)\n", ud_socket_path,
 699           client ? "client" : "server");
 700    DPRINT("local:     %s:%s\n", lhost, lport);
 701    DPRINT("remote:    %s:%s\n", rhost, rport);
 702
 703    dev = vubr_new(ud_socket_path, client);
 704    if (!dev) {
 705        return 1;
 706    }
 707
 708    vubr_backend_udp_setup(dev, lhost, lport, rhost, rport);
 709    vubr_run(dev);
 710
 711    vu_deinit(&dev->vudev);
 712
 713    return 0;
 714
 715out:
 716    fprintf(stderr, "Usage: %s ", argv[0]);
 717    fprintf(stderr, "[-c] [-u ud_socket_path] [-l lhost:lport] [-r rhost:rport]\n");
 718    fprintf(stderr, "\t-u path to unix doman socket. default: %s\n",
 719            DEFAULT_UD_SOCKET);
 720    fprintf(stderr, "\t-l local host and port. default: %s:%s\n",
 721            DEFAULT_LHOST, DEFAULT_LPORT);
 722    fprintf(stderr, "\t-r remote host and port. default: %s:%s\n",
 723            DEFAULT_RHOST, DEFAULT_RPORT);
 724    fprintf(stderr, "\t-c client mode\n");
 725
 726    return 1;
 727}
 728