qemu/tests/vhost-user-bridge.c
<<
>>
Prefs
   1/*
   2 * Vhost User Bridge
   3 *
   4 * Copyright (c) 2015 Red Hat, Inc.
   5 *
   6 * Authors:
   7 *  Victor Kaplansky <victork@redhat.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2 or
  10 * later.  See the COPYING file in the top-level directory.
  11 */
  12
  13/*
  14 * TODO:
  15 *     - main should get parameters from the command line.
  16 *     - implement all request handlers. Still not implemented:
  17 *          vubr_get_queue_num_exec()
  18 *          vubr_send_rarp_exec()
  19 *     - test for broken requests and virtqueue.
  20 *     - implement features defined by Virtio 1.0 spec.
  21 *     - support mergeable buffers and indirect descriptors.
  22 *     - implement clean shutdown.
  23 *     - implement non-blocking writes to UDP backend.
  24 *     - implement polling strategy.
  25 *     - implement clean starting/stopping of vq processing
  26 *     - implement clean starting/stopping of used and buffers
  27 *       dirty page logging.
  28 */
  29
  30#define _FILE_OFFSET_BITS 64
  31
  32#include "qemu/osdep.h"
  33#include "qemu/iov.h"
  34#include "standard-headers/linux/virtio_net.h"
  35#include "contrib/libvhost-user/libvhost-user.h"
  36
  37#define VHOST_USER_BRIDGE_DEBUG 1
  38
  39#define DPRINT(...) \
  40    do { \
  41        if (VHOST_USER_BRIDGE_DEBUG) { \
  42            printf(__VA_ARGS__); \
  43        } \
  44    } while (0)
  45
  46typedef void (*CallbackFunc)(int sock, void *ctx);
  47
  48typedef struct Event {
  49    void *ctx;
  50    CallbackFunc callback;
  51} Event;
  52
  53typedef struct Dispatcher {
  54    int max_sock;
  55    fd_set fdset;
  56    Event events[FD_SETSIZE];
  57} Dispatcher;
  58
  59typedef struct VubrDev {
  60    VuDev vudev;
  61    Dispatcher dispatcher;
  62    int backend_udp_sock;
  63    struct sockaddr_in backend_udp_dest;
  64    int hdrlen;
  65    int sock;
  66    int ready;
  67    int quit;
  68} VubrDev;
  69
  70static void
  71vubr_die(const char *s)
  72{
  73    perror(s);
  74    exit(1);
  75}
  76
  77static int
  78dispatcher_init(Dispatcher *dispr)
  79{
  80    FD_ZERO(&dispr->fdset);
  81    dispr->max_sock = -1;
  82    return 0;
  83}
  84
  85static int
  86dispatcher_add(Dispatcher *dispr, int sock, void *ctx, CallbackFunc cb)
  87{
  88    if (sock >= FD_SETSIZE) {
  89        fprintf(stderr,
  90                "Error: Failed to add new event. sock %d should be less than %d\n",
  91                sock, FD_SETSIZE);
  92        return -1;
  93    }
  94
  95    dispr->events[sock].ctx = ctx;
  96    dispr->events[sock].callback = cb;
  97
  98    FD_SET(sock, &dispr->fdset);
  99    if (sock > dispr->max_sock) {
 100        dispr->max_sock = sock;
 101    }
 102    DPRINT("Added sock %d for watching. max_sock: %d\n",
 103           sock, dispr->max_sock);
 104    return 0;
 105}
 106
 107static int
 108dispatcher_remove(Dispatcher *dispr, int sock)
 109{
 110    if (sock >= FD_SETSIZE) {
 111        fprintf(stderr,
 112                "Error: Failed to remove event. sock %d should be less than %d\n",
 113                sock, FD_SETSIZE);
 114        return -1;
 115    }
 116
 117    FD_CLR(sock, &dispr->fdset);
 118    DPRINT("Sock %d removed from dispatcher watch.\n", sock);
 119    return 0;
 120}
 121
 122/* timeout in us */
 123static int
 124dispatcher_wait(Dispatcher *dispr, uint32_t timeout)
 125{
 126    struct timeval tv;
 127    tv.tv_sec = timeout / 1000000;
 128    tv.tv_usec = timeout % 1000000;
 129
 130    fd_set fdset = dispr->fdset;
 131
 132    /* wait until some of sockets become readable. */
 133    int rc = select(dispr->max_sock + 1, &fdset, 0, 0, &tv);
 134
 135    if (rc == -1) {
 136        vubr_die("select");
 137    }
 138
 139    /* Timeout */
 140    if (rc == 0) {
 141        return 0;
 142    }
 143
 144    /* Now call callback for every ready socket. */
 145
 146    int sock;
 147    for (sock = 0; sock < dispr->max_sock + 1; sock++) {
 148        /* The callback on a socket can remove other sockets from the
 149         * dispatcher, thus we have to check that the socket is
 150         * still not removed from dispatcher's list
 151         */
 152        if (FD_ISSET(sock, &fdset) && FD_ISSET(sock, &dispr->fdset)) {
 153            Event *e = &dispr->events[sock];
 154            e->callback(sock, e->ctx);
 155        }
 156    }
 157
 158    return 0;
 159}
 160
 161static void
 162vubr_handle_tx(VuDev *dev, int qidx)
 163{
 164    VuVirtq *vq = vu_get_queue(dev, qidx);
 165    VubrDev *vubr = container_of(dev, VubrDev, vudev);
 166    int hdrlen = vubr->hdrlen;
 167    VuVirtqElement *elem = NULL;
 168
 169    assert(qidx % 2);
 170
 171    for (;;) {
 172        ssize_t ret;
 173        unsigned int out_num;
 174        struct iovec sg[VIRTQUEUE_MAX_SIZE], *out_sg;
 175
 176        elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement));
 177        if (!elem) {
 178            break;
 179        }
 180
 181        out_num = elem->out_num;
 182        out_sg = elem->out_sg;
 183        if (out_num < 1) {
 184            fprintf(stderr, "virtio-net header not in first element\n");
 185            break;
 186        }
 187        if (VHOST_USER_BRIDGE_DEBUG) {
 188            iov_hexdump(out_sg, out_num, stderr, "TX:", 1024);
 189        }
 190
 191        if (hdrlen) {
 192            unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
 193                                       out_sg, out_num,
 194                                       hdrlen, -1);
 195            out_num = sg_num;
 196            out_sg = sg;
 197        }
 198
 199        struct msghdr msg = {
 200            .msg_name = (struct sockaddr *) &vubr->backend_udp_dest,
 201            .msg_namelen = sizeof(struct sockaddr_in),
 202            .msg_iov = out_sg,
 203            .msg_iovlen = out_num,
 204        };
 205        do {
 206            ret = sendmsg(vubr->backend_udp_sock, &msg, 0);
 207        } while (ret == -1 && (errno == EAGAIN || errno == EINTR));
 208
 209        if (ret == -1) {
 210            vubr_die("sendmsg()");
 211        }
 212
 213        vu_queue_push(dev, vq, elem, 0);
 214        vu_queue_notify(dev, vq);
 215
 216        free(elem);
 217        elem = NULL;
 218    }
 219
 220    free(elem);
 221}
 222
 223
 224/* this function reverse the effect of iov_discard_front() it must be
 225 * called with 'front' being the original struct iovec and 'bytes'
 226 * being the number of bytes you shaved off
 227 */
 228static void
 229iov_restore_front(struct iovec *front, struct iovec *iov, size_t bytes)
 230{
 231    struct iovec *cur;
 232
 233    for (cur = front; cur != iov; cur++) {
 234        assert(bytes >= cur->iov_len);
 235        bytes -= cur->iov_len;
 236    }
 237
 238    cur->iov_base -= bytes;
 239    cur->iov_len += bytes;
 240}
 241
 242static void
 243iov_truncate(struct iovec *iov, unsigned iovc, size_t bytes)
 244{
 245    unsigned i;
 246
 247    for (i = 0; i < iovc; i++, iov++) {
 248        if (bytes < iov->iov_len) {
 249            iov->iov_len = bytes;
 250            return;
 251        }
 252
 253        bytes -= iov->iov_len;
 254    }
 255
 256    assert(!"couldn't truncate iov");
 257}
 258
 259static void
 260vubr_backend_recv_cb(int sock, void *ctx)
 261{
 262    VubrDev *vubr = (VubrDev *) ctx;
 263    VuDev *dev = &vubr->vudev;
 264    VuVirtq *vq = vu_get_queue(dev, 0);
 265    VuVirtqElement *elem = NULL;
 266    struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
 267    struct virtio_net_hdr_mrg_rxbuf mhdr;
 268    unsigned mhdr_cnt = 0;
 269    int hdrlen = vubr->hdrlen;
 270    int i = 0;
 271    struct virtio_net_hdr hdr = {
 272        .flags = 0,
 273        .gso_type = VIRTIO_NET_HDR_GSO_NONE
 274    };
 275
 276    DPRINT("\n\n   ***   IN UDP RECEIVE CALLBACK    ***\n\n");
 277    DPRINT("    hdrlen = %d\n", hdrlen);
 278
 279    if (!vu_queue_enabled(dev, vq) ||
 280        !vu_queue_avail_bytes(dev, vq, hdrlen, 0)) {
 281        DPRINT("Got UDP packet, but no available descriptors on RX virtq.\n");
 282        return;
 283    }
 284
 285    do {
 286        struct iovec *sg;
 287        ssize_t ret, total = 0;
 288        unsigned int num;
 289
 290        elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement));
 291        if (!elem) {
 292            break;
 293        }
 294
 295        if (elem->in_num < 1) {
 296            fprintf(stderr, "virtio-net contains no in buffers\n");
 297            break;
 298        }
 299
 300        sg = elem->in_sg;
 301        num = elem->in_num;
 302        if (i == 0) {
 303            if (hdrlen == 12) {
 304                mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
 305                                    sg, elem->in_num,
 306                                    offsetof(typeof(mhdr), num_buffers),
 307                                    sizeof(mhdr.num_buffers));
 308            }
 309            iov_from_buf(sg, elem->in_num, 0, &hdr, sizeof hdr);
 310            total += hdrlen;
 311            ret = iov_discard_front(&sg, &num, hdrlen);
 312            assert(ret == hdrlen);
 313        }
 314
 315        struct msghdr msg = {
 316            .msg_name = (struct sockaddr *) &vubr->backend_udp_dest,
 317            .msg_namelen = sizeof(struct sockaddr_in),
 318            .msg_iov = sg,
 319            .msg_iovlen = elem->in_num,
 320            .msg_flags = MSG_DONTWAIT,
 321        };
 322        do {
 323            ret = recvmsg(vubr->backend_udp_sock, &msg, 0);
 324        } while (ret == -1 && (errno == EINTR));
 325
 326        if (i == 0) {
 327            iov_restore_front(elem->in_sg, sg, hdrlen);
 328        }
 329
 330        if (ret == -1) {
 331            if (errno == EWOULDBLOCK) {
 332                vu_queue_rewind(dev, vq, 1);
 333                break;
 334            }
 335
 336            vubr_die("recvmsg()");
 337        }
 338
 339        total += ret;
 340        iov_truncate(elem->in_sg, elem->in_num, total);
 341        vu_queue_fill(dev, vq, elem, total, i++);
 342
 343        free(elem);
 344        elem = NULL;
 345    } while (false); /* could loop if DONTWAIT worked? */
 346
 347    if (mhdr_cnt) {
 348        mhdr.num_buffers = i;
 349        iov_from_buf(mhdr_sg, mhdr_cnt,
 350                     0,
 351                     &mhdr.num_buffers, sizeof mhdr.num_buffers);
 352    }
 353
 354    vu_queue_flush(dev, vq, i);
 355    vu_queue_notify(dev, vq);
 356
 357    free(elem);
 358}
 359
 360static void
 361vubr_receive_cb(int sock, void *ctx)
 362{
 363    VubrDev *vubr = (VubrDev *)ctx;
 364
 365    if (!vu_dispatch(&vubr->vudev)) {
 366        fprintf(stderr, "Error while dispatching\n");
 367    }
 368}
 369
 370typedef struct WatchData {
 371    VuDev *dev;
 372    vu_watch_cb cb;
 373    void *data;
 374} WatchData;
 375
 376static void
 377watch_cb(int sock, void *ctx)
 378{
 379    struct WatchData *wd = ctx;
 380
 381    wd->cb(wd->dev, VU_WATCH_IN, wd->data);
 382}
 383
 384static void
 385vubr_set_watch(VuDev *dev, int fd, int condition,
 386               vu_watch_cb cb, void *data)
 387{
 388    VubrDev *vubr = container_of(dev, VubrDev, vudev);
 389    static WatchData watches[FD_SETSIZE];
 390    struct WatchData *wd = &watches[fd];
 391
 392    wd->cb = cb;
 393    wd->data = data;
 394    wd->dev = dev;
 395    dispatcher_add(&vubr->dispatcher, fd, wd, watch_cb);
 396}
 397
 398static void
 399vubr_remove_watch(VuDev *dev, int fd)
 400{
 401    VubrDev *vubr = container_of(dev, VubrDev, vudev);
 402
 403    dispatcher_remove(&vubr->dispatcher, fd);
 404}
 405
 406static int
 407vubr_send_rarp_exec(VuDev *dev, VhostUserMsg *vmsg)
 408{
 409    DPRINT("Function %s() not implemented yet.\n", __func__);
 410    return 0;
 411}
 412
 413static int
 414vubr_process_msg(VuDev *dev, VhostUserMsg *vmsg, int *do_reply)
 415{
 416    switch (vmsg->request) {
 417    case VHOST_USER_SEND_RARP:
 418        *do_reply = vubr_send_rarp_exec(dev, vmsg);
 419        return 1;
 420    default:
 421        /* let the library handle the rest */
 422        return 0;
 423    }
 424
 425    return 0;
 426}
 427
 428static void
 429vubr_set_features(VuDev *dev, uint64_t features)
 430{
 431    VubrDev *vubr = container_of(dev, VubrDev, vudev);
 432
 433    if ((features & (1ULL << VIRTIO_F_VERSION_1)) ||
 434        (features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))) {
 435        vubr->hdrlen = 12;
 436    } else {
 437        vubr->hdrlen = 10;
 438    }
 439}
 440
 441static uint64_t
 442vubr_get_features(VuDev *dev)
 443{
 444    return 1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE |
 445        1ULL << VIRTIO_NET_F_MRG_RXBUF;
 446}
 447
 448static void
 449vubr_queue_set_started(VuDev *dev, int qidx, bool started)
 450{
 451    VuVirtq *vq = vu_get_queue(dev, qidx);
 452
 453    if (qidx % 2 == 1) {
 454        vu_set_queue_handler(dev, vq, started ? vubr_handle_tx : NULL);
 455    }
 456}
 457
 458static void
 459vubr_panic(VuDev *dev, const char *msg)
 460{
 461    VubrDev *vubr = container_of(dev, VubrDev, vudev);
 462
 463    fprintf(stderr, "PANIC: %s\n", msg);
 464
 465    dispatcher_remove(&vubr->dispatcher, dev->sock);
 466    vubr->quit = 1;
 467}
 468
 469static const VuDevIface vuiface = {
 470    .get_features = vubr_get_features,
 471    .set_features = vubr_set_features,
 472    .process_msg = vubr_process_msg,
 473    .queue_set_started = vubr_queue_set_started,
 474};
 475
 476static void
 477vubr_accept_cb(int sock, void *ctx)
 478{
 479    VubrDev *dev = (VubrDev *)ctx;
 480    int conn_fd;
 481    struct sockaddr_un un;
 482    socklen_t len = sizeof(un);
 483
 484    conn_fd = accept(sock, (struct sockaddr *) &un, &len);
 485    if (conn_fd == -1) {
 486        vubr_die("accept()");
 487    }
 488    DPRINT("Got connection from remote peer on sock %d\n", conn_fd);
 489
 490    vu_init(&dev->vudev,
 491            conn_fd,
 492            vubr_panic,
 493            vubr_set_watch,
 494            vubr_remove_watch,
 495            &vuiface);
 496
 497    dispatcher_add(&dev->dispatcher, conn_fd, ctx, vubr_receive_cb);
 498    dispatcher_remove(&dev->dispatcher, sock);
 499}
 500
 501static VubrDev *
 502vubr_new(const char *path, bool client)
 503{
 504    VubrDev *dev = (VubrDev *) calloc(1, sizeof(VubrDev));
 505    struct sockaddr_un un;
 506    CallbackFunc cb;
 507    size_t len;
 508
 509    /* Get a UNIX socket. */
 510    dev->sock = socket(AF_UNIX, SOCK_STREAM, 0);
 511    if (dev->sock == -1) {
 512        vubr_die("socket");
 513    }
 514
 515    un.sun_family = AF_UNIX;
 516    strcpy(un.sun_path, path);
 517    len = sizeof(un.sun_family) + strlen(path);
 518
 519    if (!client) {
 520        unlink(path);
 521
 522        if (bind(dev->sock, (struct sockaddr *) &un, len) == -1) {
 523            vubr_die("bind");
 524        }
 525
 526        if (listen(dev->sock, 1) == -1) {
 527            vubr_die("listen");
 528        }
 529        cb = vubr_accept_cb;
 530
 531        DPRINT("Waiting for connections on UNIX socket %s ...\n", path);
 532    } else {
 533        if (connect(dev->sock, (struct sockaddr *)&un, len) == -1) {
 534            vubr_die("connect");
 535        }
 536        vu_init(&dev->vudev,
 537                dev->sock,
 538                vubr_panic,
 539                vubr_set_watch,
 540                vubr_remove_watch,
 541                &vuiface);
 542        cb = vubr_receive_cb;
 543    }
 544
 545    dispatcher_init(&dev->dispatcher);
 546
 547    dispatcher_add(&dev->dispatcher, dev->sock, (void *)dev, cb);
 548
 549    return dev;
 550}
 551
 552static void
 553vubr_set_host(struct sockaddr_in *saddr, const char *host)
 554{
 555    if (isdigit(host[0])) {
 556        if (!inet_aton(host, &saddr->sin_addr)) {
 557            fprintf(stderr, "inet_aton() failed.\n");
 558            exit(1);
 559        }
 560    } else {
 561        struct hostent *he = gethostbyname(host);
 562
 563        if (!he) {
 564            fprintf(stderr, "gethostbyname() failed.\n");
 565            exit(1);
 566        }
 567        saddr->sin_addr = *(struct in_addr *)he->h_addr;
 568    }
 569}
 570
 571static void
 572vubr_backend_udp_setup(VubrDev *dev,
 573                       const char *local_host,
 574                       const char *local_port,
 575                       const char *remote_host,
 576                       const char *remote_port)
 577{
 578    int sock;
 579    const char *r;
 580
 581    int lport, rport;
 582
 583    lport = strtol(local_port, (char **)&r, 0);
 584    if (r == local_port) {
 585        fprintf(stderr, "lport parsing failed.\n");
 586        exit(1);
 587    }
 588
 589    rport = strtol(remote_port, (char **)&r, 0);
 590    if (r == remote_port) {
 591        fprintf(stderr, "rport parsing failed.\n");
 592        exit(1);
 593    }
 594
 595    struct sockaddr_in si_local = {
 596        .sin_family = AF_INET,
 597        .sin_port = htons(lport),
 598    };
 599
 600    vubr_set_host(&si_local, local_host);
 601
 602    /* setup destination for sends */
 603    dev->backend_udp_dest = (struct sockaddr_in) {
 604        .sin_family = AF_INET,
 605        .sin_port = htons(rport),
 606    };
 607    vubr_set_host(&dev->backend_udp_dest, remote_host);
 608
 609    sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
 610    if (sock == -1) {
 611        vubr_die("socket");
 612    }
 613
 614    if (bind(sock, (struct sockaddr *)&si_local, sizeof(si_local)) == -1) {
 615        vubr_die("bind");
 616    }
 617
 618    dev->backend_udp_sock = sock;
 619    dispatcher_add(&dev->dispatcher, sock, dev, vubr_backend_recv_cb);
 620    DPRINT("Waiting for data from udp backend on %s:%d...\n",
 621           local_host, lport);
 622}
 623
 624static void
 625vubr_run(VubrDev *dev)
 626{
 627    while (!dev->quit) {
 628        /* timeout 200ms */
 629        dispatcher_wait(&dev->dispatcher, 200000);
 630        /* Here one can try polling strategy. */
 631    }
 632}
 633
 634static int
 635vubr_parse_host_port(const char **host, const char **port, const char *buf)
 636{
 637    char *p = strchr(buf, ':');
 638
 639    if (!p) {
 640        return -1;
 641    }
 642    *p = '\0';
 643    *host = strdup(buf);
 644    *port = strdup(p + 1);
 645    return 0;
 646}
 647
 648#define DEFAULT_UD_SOCKET "/tmp/vubr.sock"
 649#define DEFAULT_LHOST "127.0.0.1"
 650#define DEFAULT_LPORT "4444"
 651#define DEFAULT_RHOST "127.0.0.1"
 652#define DEFAULT_RPORT "5555"
 653
 654static const char *ud_socket_path = DEFAULT_UD_SOCKET;
 655static const char *lhost = DEFAULT_LHOST;
 656static const char *lport = DEFAULT_LPORT;
 657static const char *rhost = DEFAULT_RHOST;
 658static const char *rport = DEFAULT_RPORT;
 659
 660int
 661main(int argc, char *argv[])
 662{
 663    VubrDev *dev;
 664    int opt;
 665    bool client = false;
 666
 667    while ((opt = getopt(argc, argv, "l:r:u:c")) != -1) {
 668
 669        switch (opt) {
 670        case 'l':
 671            if (vubr_parse_host_port(&lhost, &lport, optarg) < 0) {
 672                goto out;
 673            }
 674            break;
 675        case 'r':
 676            if (vubr_parse_host_port(&rhost, &rport, optarg) < 0) {
 677                goto out;
 678            }
 679            break;
 680        case 'u':
 681            ud_socket_path = strdup(optarg);
 682            break;
 683        case 'c':
 684            client = true;
 685            break;
 686        default:
 687            goto out;
 688        }
 689    }
 690
 691    DPRINT("ud socket: %s (%s)\n", ud_socket_path,
 692           client ? "client" : "server");
 693    DPRINT("local:     %s:%s\n", lhost, lport);
 694    DPRINT("remote:    %s:%s\n", rhost, rport);
 695
 696    dev = vubr_new(ud_socket_path, client);
 697    if (!dev) {
 698        return 1;
 699    }
 700
 701    vubr_backend_udp_setup(dev, lhost, lport, rhost, rport);
 702    vubr_run(dev);
 703
 704    vu_deinit(&dev->vudev);
 705
 706    return 0;
 707
 708out:
 709    fprintf(stderr, "Usage: %s ", argv[0]);
 710    fprintf(stderr, "[-c] [-u ud_socket_path] [-l lhost:lport] [-r rhost:rport]\n");
 711    fprintf(stderr, "\t-u path to unix doman socket. default: %s\n",
 712            DEFAULT_UD_SOCKET);
 713    fprintf(stderr, "\t-l local host and port. default: %s:%s\n",
 714            DEFAULT_LHOST, DEFAULT_LPORT);
 715    fprintf(stderr, "\t-r remote host and port. default: %s:%s\n",
 716            DEFAULT_RHOST, DEFAULT_RPORT);
 717    fprintf(stderr, "\t-c client mode\n");
 718
 719    return 1;
 720}
 721