qemu/net/tap.c
<<
>>
Prefs
   1/*
   2 * QEMU System Emulator
   3 *
   4 * Copyright (c) 2003-2008 Fabrice Bellard
   5 * Copyright (c) 2009 Red Hat, Inc.
   6 *
   7 * Permission is hereby granted, free of charge, to any person obtaining a copy
   8 * of this software and associated documentation files (the "Software"), to deal
   9 * in the Software without restriction, including without limitation the rights
  10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 * copies of the Software, and to permit persons to whom the Software is
  12 * furnished to do so, subject to the following conditions:
  13 *
  14 * The above copyright notice and this permission notice shall be included in
  15 * all copies or substantial portions of the Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23 * THE SOFTWARE.
  24 */
  25
  26#include "qemu/osdep.h"
  27#include "tap_int.h"
  28
  29
  30#include <sys/ioctl.h>
  31#include <sys/wait.h>
  32#include <sys/socket.h>
  33#include <net/if.h>
  34
  35#include "net/net.h"
  36#include "clients.h"
  37#include "monitor/monitor.h"
  38#include "sysemu/sysemu.h"
  39#include "qapi/error.h"
  40#include "qemu-common.h"
  41#include "qemu/cutils.h"
  42#include "qemu/error-report.h"
  43
  44#include "net/tap.h"
  45
  46#include "net/vhost_net.h"
  47
  48typedef struct TAPState {
  49    NetClientState nc;
  50    int fd;
  51    char down_script[1024];
  52    char down_script_arg[128];
  53    uint8_t buf[NET_BUFSIZE];
  54    bool read_poll;
  55    bool write_poll;
  56    bool using_vnet_hdr;
  57    bool has_ufo;
  58    bool enabled;
  59    VHostNetState *vhost_net;
  60    unsigned host_vnet_hdr_len;
  61} TAPState;
  62
  63static void launch_script(const char *setup_script, const char *ifname,
  64                          int fd, Error **errp);
  65
  66static void tap_send(void *opaque);
  67static void tap_writable(void *opaque);
  68
  69static void tap_update_fd_handler(TAPState *s)
  70{
  71    qemu_set_fd_handler(s->fd,
  72                        s->read_poll && s->enabled ? tap_send : NULL,
  73                        s->write_poll && s->enabled ? tap_writable : NULL,
  74                        s);
  75}
  76
  77static void tap_read_poll(TAPState *s, bool enable)
  78{
  79    s->read_poll = enable;
  80    tap_update_fd_handler(s);
  81}
  82
  83static void tap_write_poll(TAPState *s, bool enable)
  84{
  85    s->write_poll = enable;
  86    tap_update_fd_handler(s);
  87}
  88
  89static void tap_writable(void *opaque)
  90{
  91    TAPState *s = opaque;
  92
  93    tap_write_poll(s, false);
  94
  95    qemu_flush_queued_packets(&s->nc);
  96}
  97
  98static ssize_t tap_write_packet(TAPState *s, const struct iovec *iov, int iovcnt)
  99{
 100    ssize_t len;
 101
 102    do {
 103        len = writev(s->fd, iov, iovcnt);
 104    } while (len == -1 && errno == EINTR);
 105
 106    if (len == -1 && errno == EAGAIN) {
 107        tap_write_poll(s, true);
 108        return 0;
 109    }
 110
 111    return len;
 112}
 113
 114static ssize_t tap_receive_iov(NetClientState *nc, const struct iovec *iov,
 115                               int iovcnt)
 116{
 117    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 118    const struct iovec *iovp = iov;
 119    struct iovec iov_copy[iovcnt + 1];
 120    struct virtio_net_hdr_mrg_rxbuf hdr = { };
 121
 122    if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
 123        iov_copy[0].iov_base = &hdr;
 124        iov_copy[0].iov_len =  s->host_vnet_hdr_len;
 125        memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov));
 126        iovp = iov_copy;
 127        iovcnt++;
 128    }
 129
 130    return tap_write_packet(s, iovp, iovcnt);
 131}
 132
 133static ssize_t tap_receive_raw(NetClientState *nc, const uint8_t *buf, size_t size)
 134{
 135    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 136    struct iovec iov[2];
 137    int iovcnt = 0;
 138    struct virtio_net_hdr_mrg_rxbuf hdr = { };
 139
 140    if (s->host_vnet_hdr_len) {
 141        iov[iovcnt].iov_base = &hdr;
 142        iov[iovcnt].iov_len  = s->host_vnet_hdr_len;
 143        iovcnt++;
 144    }
 145
 146    iov[iovcnt].iov_base = (char *)buf;
 147    iov[iovcnt].iov_len  = size;
 148    iovcnt++;
 149
 150    return tap_write_packet(s, iov, iovcnt);
 151}
 152
 153static ssize_t tap_receive(NetClientState *nc, const uint8_t *buf, size_t size)
 154{
 155    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 156    struct iovec iov[1];
 157
 158    if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
 159        return tap_receive_raw(nc, buf, size);
 160    }
 161
 162    iov[0].iov_base = (char *)buf;
 163    iov[0].iov_len  = size;
 164
 165    return tap_write_packet(s, iov, 1);
 166}
 167
 168#ifndef __sun__
 169ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen)
 170{
 171    return read(tapfd, buf, maxlen);
 172}
 173#endif
 174
 175static void tap_send_completed(NetClientState *nc, ssize_t len)
 176{
 177    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 178    tap_read_poll(s, true);
 179}
 180
 181static void tap_send(void *opaque)
 182{
 183    TAPState *s = opaque;
 184    int size;
 185    int packets = 0;
 186
 187    while (true) {
 188        uint8_t *buf = s->buf;
 189
 190        size = tap_read_packet(s->fd, s->buf, sizeof(s->buf));
 191        if (size <= 0) {
 192            break;
 193        }
 194
 195        if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
 196            buf  += s->host_vnet_hdr_len;
 197            size -= s->host_vnet_hdr_len;
 198        }
 199
 200        size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed);
 201        if (size == 0) {
 202            tap_read_poll(s, false);
 203            break;
 204        } else if (size < 0) {
 205            break;
 206        }
 207
 208        /*
 209         * When the host keeps receiving more packets while tap_send() is
 210         * running we can hog the QEMU global mutex.  Limit the number of
 211         * packets that are processed per tap_send() callback to prevent
 212         * stalling the guest.
 213         */
 214        packets++;
 215        if (packets >= 50) {
 216            break;
 217        }
 218    }
 219}
 220
 221static bool tap_has_ufo(NetClientState *nc)
 222{
 223    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 224
 225    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
 226
 227    return s->has_ufo;
 228}
 229
 230static bool tap_has_vnet_hdr(NetClientState *nc)
 231{
 232    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 233
 234    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
 235
 236    return !!s->host_vnet_hdr_len;
 237}
 238
 239static bool tap_has_vnet_hdr_len(NetClientState *nc, int len)
 240{
 241    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 242
 243    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
 244
 245    return !!tap_probe_vnet_hdr_len(s->fd, len);
 246}
 247
 248static void tap_set_vnet_hdr_len(NetClientState *nc, int len)
 249{
 250    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 251
 252    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
 253    assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) ||
 254           len == sizeof(struct virtio_net_hdr));
 255
 256    tap_fd_set_vnet_hdr_len(s->fd, len);
 257    s->host_vnet_hdr_len = len;
 258}
 259
 260static void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr)
 261{
 262    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 263
 264    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
 265    assert(!!s->host_vnet_hdr_len == using_vnet_hdr);
 266
 267    s->using_vnet_hdr = using_vnet_hdr;
 268}
 269
 270static int tap_set_vnet_le(NetClientState *nc, bool is_le)
 271{
 272    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 273
 274    return tap_fd_set_vnet_le(s->fd, is_le);
 275}
 276
 277static int tap_set_vnet_be(NetClientState *nc, bool is_be)
 278{
 279    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 280
 281    return tap_fd_set_vnet_be(s->fd, is_be);
 282}
 283
 284static void tap_set_offload(NetClientState *nc, int csum, int tso4,
 285                     int tso6, int ecn, int ufo)
 286{
 287    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 288    if (s->fd < 0) {
 289        return;
 290    }
 291
 292    tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo);
 293}
 294
 295static void tap_cleanup(NetClientState *nc)
 296{
 297    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 298    Error *err = NULL;
 299
 300    if (s->vhost_net) {
 301        vhost_net_cleanup(s->vhost_net);
 302        s->vhost_net = NULL;
 303    }
 304
 305    qemu_purge_queued_packets(nc);
 306
 307    if (s->down_script[0]) {
 308        launch_script(s->down_script, s->down_script_arg, s->fd, &err);
 309        if (err) {
 310            error_report_err(err);
 311        }
 312    }
 313
 314    tap_read_poll(s, false);
 315    tap_write_poll(s, false);
 316    close(s->fd);
 317    s->fd = -1;
 318}
 319
 320static void tap_poll(NetClientState *nc, bool enable)
 321{
 322    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 323    tap_read_poll(s, enable);
 324    tap_write_poll(s, enable);
 325}
 326
 327int tap_get_fd(NetClientState *nc)
 328{
 329    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 330    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
 331    return s->fd;
 332}
 333
 334/* fd support */
 335
 336static NetClientInfo net_tap_info = {
 337    .type = NET_CLIENT_OPTIONS_KIND_TAP,
 338    .size = sizeof(TAPState),
 339    .receive = tap_receive,
 340    .receive_raw = tap_receive_raw,
 341    .receive_iov = tap_receive_iov,
 342    .poll = tap_poll,
 343    .cleanup = tap_cleanup,
 344    .has_ufo = tap_has_ufo,
 345    .has_vnet_hdr = tap_has_vnet_hdr,
 346    .has_vnet_hdr_len = tap_has_vnet_hdr_len,
 347    .using_vnet_hdr = tap_using_vnet_hdr,
 348    .set_offload = tap_set_offload,
 349    .set_vnet_hdr_len = tap_set_vnet_hdr_len,
 350    .set_vnet_le = tap_set_vnet_le,
 351    .set_vnet_be = tap_set_vnet_be,
 352};
 353
 354static TAPState *net_tap_fd_init(NetClientState *peer,
 355                                 const char *model,
 356                                 const char *name,
 357                                 int fd,
 358                                 int vnet_hdr)
 359{
 360    NetClientState *nc;
 361    TAPState *s;
 362
 363    nc = qemu_new_net_client(&net_tap_info, peer, model, name);
 364
 365    s = DO_UPCAST(TAPState, nc, nc);
 366
 367    s->fd = fd;
 368    s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
 369    s->using_vnet_hdr = false;
 370    s->has_ufo = tap_probe_has_ufo(s->fd);
 371    s->enabled = true;
 372    tap_set_offload(&s->nc, 0, 0, 0, 0, 0);
 373    /*
 374     * Make sure host header length is set correctly in tap:
 375     * it might have been modified by another instance of qemu.
 376     */
 377    if (tap_probe_vnet_hdr_len(s->fd, s->host_vnet_hdr_len)) {
 378        tap_fd_set_vnet_hdr_len(s->fd, s->host_vnet_hdr_len);
 379    }
 380    tap_read_poll(s, true);
 381    s->vhost_net = NULL;
 382    return s;
 383}
 384
 385static void launch_script(const char *setup_script, const char *ifname,
 386                          int fd, Error **errp)
 387{
 388    int pid, status;
 389    char *args[3];
 390    char **parg;
 391
 392    /* try to launch network script */
 393    pid = fork();
 394    if (pid < 0) {
 395        error_setg_errno(errp, errno, "could not launch network script %s",
 396                         setup_script);
 397        return;
 398    }
 399    if (pid == 0) {
 400        int open_max = sysconf(_SC_OPEN_MAX), i;
 401
 402        for (i = 3; i < open_max; i++) {
 403            if (i != fd) {
 404                close(i);
 405            }
 406        }
 407        parg = args;
 408        *parg++ = (char *)setup_script;
 409        *parg++ = (char *)ifname;
 410        *parg = NULL;
 411        execv(setup_script, args);
 412        _exit(1);
 413    } else {
 414        while (waitpid(pid, &status, 0) != pid) {
 415            /* loop */
 416        }
 417
 418        if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
 419            return;
 420        }
 421        error_setg(errp, "network script %s failed with status %d",
 422                   setup_script, status);
 423    }
 424}
 425
 426static int recv_fd(int c)
 427{
 428    int fd;
 429    uint8_t msgbuf[CMSG_SPACE(sizeof(fd))];
 430    struct msghdr msg = {
 431        .msg_control = msgbuf,
 432        .msg_controllen = sizeof(msgbuf),
 433    };
 434    struct cmsghdr *cmsg;
 435    struct iovec iov;
 436    uint8_t req[1];
 437    ssize_t len;
 438
 439    cmsg = CMSG_FIRSTHDR(&msg);
 440    cmsg->cmsg_level = SOL_SOCKET;
 441    cmsg->cmsg_type = SCM_RIGHTS;
 442    cmsg->cmsg_len = CMSG_LEN(sizeof(fd));
 443    msg.msg_controllen = cmsg->cmsg_len;
 444
 445    iov.iov_base = req;
 446    iov.iov_len = sizeof(req);
 447
 448    msg.msg_iov = &iov;
 449    msg.msg_iovlen = 1;
 450
 451    len = recvmsg(c, &msg, 0);
 452    if (len > 0) {
 453        memcpy(&fd, CMSG_DATA(cmsg), sizeof(fd));
 454        return fd;
 455    }
 456
 457    return len;
 458}
 459
 460static int net_bridge_run_helper(const char *helper, const char *bridge,
 461                                 Error **errp)
 462{
 463    sigset_t oldmask, mask;
 464    int pid, status;
 465    char *args[5];
 466    char **parg;
 467    int sv[2];
 468
 469    sigemptyset(&mask);
 470    sigaddset(&mask, SIGCHLD);
 471    sigprocmask(SIG_BLOCK, &mask, &oldmask);
 472
 473    if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
 474        error_setg_errno(errp, errno, "socketpair() failed");
 475        return -1;
 476    }
 477
 478    /* try to launch bridge helper */
 479    pid = fork();
 480    if (pid < 0) {
 481        error_setg_errno(errp, errno, "Can't fork bridge helper");
 482        return -1;
 483    }
 484    if (pid == 0) {
 485        int open_max = sysconf(_SC_OPEN_MAX), i;
 486        char fd_buf[6+10];
 487        char br_buf[6+IFNAMSIZ] = {0};
 488        char helper_cmd[PATH_MAX + sizeof(fd_buf) + sizeof(br_buf) + 15];
 489
 490        for (i = 3; i < open_max; i++) {
 491            if (i != sv[1]) {
 492                close(i);
 493            }
 494        }
 495
 496        snprintf(fd_buf, sizeof(fd_buf), "%s%d", "--fd=", sv[1]);
 497
 498        if (strrchr(helper, ' ') || strrchr(helper, '\t')) {
 499            /* assume helper is a command */
 500
 501            if (strstr(helper, "--br=") == NULL) {
 502                snprintf(br_buf, sizeof(br_buf), "%s%s", "--br=", bridge);
 503            }
 504
 505            snprintf(helper_cmd, sizeof(helper_cmd), "%s %s %s %s",
 506                     helper, "--use-vnet", fd_buf, br_buf);
 507
 508            parg = args;
 509            *parg++ = (char *)"sh";
 510            *parg++ = (char *)"-c";
 511            *parg++ = helper_cmd;
 512            *parg++ = NULL;
 513
 514            execv("/bin/sh", args);
 515        } else {
 516            /* assume helper is just the executable path name */
 517
 518            snprintf(br_buf, sizeof(br_buf), "%s%s", "--br=", bridge);
 519
 520            parg = args;
 521            *parg++ = (char *)helper;
 522            *parg++ = (char *)"--use-vnet";
 523            *parg++ = fd_buf;
 524            *parg++ = br_buf;
 525            *parg++ = NULL;
 526
 527            execv(helper, args);
 528        }
 529        _exit(1);
 530
 531    } else {
 532        int fd;
 533        int saved_errno;
 534
 535        close(sv[1]);
 536
 537        do {
 538            fd = recv_fd(sv[0]);
 539        } while (fd == -1 && errno == EINTR);
 540        saved_errno = errno;
 541
 542        close(sv[0]);
 543
 544        while (waitpid(pid, &status, 0) != pid) {
 545            /* loop */
 546        }
 547        sigprocmask(SIG_SETMASK, &oldmask, NULL);
 548        if (fd < 0) {
 549            error_setg_errno(errp, saved_errno,
 550                             "failed to recv file descriptor");
 551            return -1;
 552        }
 553        if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
 554            error_setg(errp, "bridge helper failed");
 555            return -1;
 556        }
 557        return fd;
 558    }
 559}
 560
 561int net_init_bridge(const NetClientOptions *opts, const char *name,
 562                    NetClientState *peer, Error **errp)
 563{
 564    const NetdevBridgeOptions *bridge;
 565    const char *helper, *br;
 566    TAPState *s;
 567    int fd, vnet_hdr;
 568
 569    assert(opts->type == NET_CLIENT_OPTIONS_KIND_BRIDGE);
 570    bridge = opts->u.bridge.data;
 571
 572    helper = bridge->has_helper ? bridge->helper : DEFAULT_BRIDGE_HELPER;
 573    br     = bridge->has_br     ? bridge->br     : DEFAULT_BRIDGE_INTERFACE;
 574
 575    fd = net_bridge_run_helper(helper, br, errp);
 576    if (fd == -1) {
 577        return -1;
 578    }
 579
 580    fcntl(fd, F_SETFL, O_NONBLOCK);
 581    vnet_hdr = tap_probe_vnet_hdr(fd);
 582    s = net_tap_fd_init(peer, "bridge", name, fd, vnet_hdr);
 583
 584    snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s,br=%s", helper,
 585             br);
 586
 587    return 0;
 588}
 589
 590static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr,
 591                        const char *setup_script, char *ifname,
 592                        size_t ifname_sz, int mq_required, Error **errp)
 593{
 594    Error *err = NULL;
 595    int fd, vnet_hdr_required;
 596
 597    if (tap->has_vnet_hdr) {
 598        *vnet_hdr = tap->vnet_hdr;
 599        vnet_hdr_required = *vnet_hdr;
 600    } else {
 601        *vnet_hdr = 1;
 602        vnet_hdr_required = 0;
 603    }
 604
 605    TFR(fd = tap_open(ifname, ifname_sz, vnet_hdr, vnet_hdr_required,
 606                      mq_required, errp));
 607    if (fd < 0) {
 608        return -1;
 609    }
 610
 611    if (setup_script &&
 612        setup_script[0] != '\0' &&
 613        strcmp(setup_script, "no") != 0) {
 614        launch_script(setup_script, ifname, fd, &err);
 615        if (err) {
 616            error_propagate(errp, err);
 617            close(fd);
 618            return -1;
 619        }
 620    }
 621
 622    return fd;
 623}
 624
 625#define MAX_TAP_QUEUES 1024
 626
 627static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
 628                             const char *model, const char *name,
 629                             const char *ifname, const char *script,
 630                             const char *downscript, const char *vhostfdname,
 631                             int vnet_hdr, int fd, Error **errp)
 632{
 633    Error *err = NULL;
 634    TAPState *s = net_tap_fd_init(peer, model, name, fd, vnet_hdr);
 635    int vhostfd;
 636
 637    tap_set_sndbuf(s->fd, tap, &err);
 638    if (err) {
 639        error_propagate(errp, err);
 640        return;
 641    }
 642
 643    if (tap->has_fd || tap->has_fds) {
 644        snprintf(s->nc.info_str, sizeof(s->nc.info_str), "fd=%d", fd);
 645    } else if (tap->has_helper) {
 646        snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s",
 647                 tap->helper);
 648    } else {
 649        snprintf(s->nc.info_str, sizeof(s->nc.info_str),
 650                 "ifname=%s,script=%s,downscript=%s", ifname, script,
 651                 downscript);
 652
 653        if (strcmp(downscript, "no") != 0) {
 654            snprintf(s->down_script, sizeof(s->down_script), "%s", downscript);
 655            snprintf(s->down_script_arg, sizeof(s->down_script_arg),
 656                     "%s", ifname);
 657        }
 658    }
 659
 660    if (tap->has_vhost ? tap->vhost :
 661        vhostfdname || (tap->has_vhostforce && tap->vhostforce)) {
 662        VhostNetOptions options;
 663
 664        options.backend_type = VHOST_BACKEND_TYPE_KERNEL;
 665        options.net_backend = &s->nc;
 666
 667        if (vhostfdname) {
 668            vhostfd = monitor_fd_param(cur_mon, vhostfdname, &err);
 669            if (vhostfd == -1) {
 670                error_propagate(errp, err);
 671                return;
 672            }
 673        } else {
 674            vhostfd = open("/dev/vhost-net", O_RDWR);
 675            if (vhostfd < 0) {
 676                error_setg_errno(errp, errno,
 677                                 "tap: open vhost char device failed");
 678                return;
 679            }
 680        }
 681        options.opaque = (void *)(uintptr_t)vhostfd;
 682
 683        s->vhost_net = vhost_net_init(&options);
 684        if (!s->vhost_net) {
 685            error_setg(errp,
 686                       "vhost-net requested but could not be initialized");
 687            return;
 688        }
 689    } else if (vhostfdname) {
 690        error_setg(errp, "vhostfd= is not valid without vhost");
 691    }
 692}
 693
 694static int get_fds(char *str, char *fds[], int max)
 695{
 696    char *ptr = str, *this;
 697    size_t len = strlen(str);
 698    int i = 0;
 699
 700    while (i < max && ptr < str + len) {
 701        this = strchr(ptr, ':');
 702
 703        if (this == NULL) {
 704            fds[i] = g_strdup(ptr);
 705        } else {
 706            fds[i] = g_strndup(ptr, this - ptr);
 707        }
 708
 709        i++;
 710        if (this == NULL) {
 711            break;
 712        } else {
 713            ptr = this + 1;
 714        }
 715    }
 716
 717    return i;
 718}
 719
 720int net_init_tap(const NetClientOptions *opts, const char *name,
 721                 NetClientState *peer, Error **errp)
 722{
 723    const NetdevTapOptions *tap;
 724    int fd, vnet_hdr = 0, i = 0, queues;
 725    /* for the no-fd, no-helper case */
 726    const char *script = NULL; /* suppress wrong "uninit'd use" gcc warning */
 727    const char *downscript = NULL;
 728    Error *err = NULL;
 729    const char *vhostfdname;
 730    char ifname[128];
 731
 732    assert(opts->type == NET_CLIENT_OPTIONS_KIND_TAP);
 733    tap = opts->u.tap.data;
 734    queues = tap->has_queues ? tap->queues : 1;
 735    vhostfdname = tap->has_vhostfd ? tap->vhostfd : NULL;
 736
 737    /* QEMU vlans does not support multiqueue tap, in this case peer is set.
 738     * For -netdev, peer is always NULL. */
 739    if (peer && (tap->has_queues || tap->has_fds || tap->has_vhostfds)) {
 740        error_setg(errp, "Multiqueue tap cannot be used with QEMU vlans");
 741        return -1;
 742    }
 743
 744    if (tap->has_fd) {
 745        if (tap->has_ifname || tap->has_script || tap->has_downscript ||
 746            tap->has_vnet_hdr || tap->has_helper || tap->has_queues ||
 747            tap->has_fds || tap->has_vhostfds) {
 748            error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, "
 749                       "helper=, queues=, fds=, and vhostfds= "
 750                       "are invalid with fd=");
 751            return -1;
 752        }
 753
 754        fd = monitor_fd_param(cur_mon, tap->fd, &err);
 755        if (fd == -1) {
 756            error_propagate(errp, err);
 757            return -1;
 758        }
 759
 760        fcntl(fd, F_SETFL, O_NONBLOCK);
 761
 762        vnet_hdr = tap_probe_vnet_hdr(fd);
 763
 764        net_init_tap_one(tap, peer, "tap", name, NULL,
 765                         script, downscript,
 766                         vhostfdname, vnet_hdr, fd, &err);
 767        if (err) {
 768            error_propagate(errp, err);
 769            return -1;
 770        }
 771    } else if (tap->has_fds) {
 772        char *fds[MAX_TAP_QUEUES];
 773        char *vhost_fds[MAX_TAP_QUEUES];
 774        int nfds, nvhosts;
 775
 776        if (tap->has_ifname || tap->has_script || tap->has_downscript ||
 777            tap->has_vnet_hdr || tap->has_helper || tap->has_queues ||
 778            tap->has_vhostfd) {
 779            error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, "
 780                       "helper=, queues=, and vhostfd= "
 781                       "are invalid with fds=");
 782            return -1;
 783        }
 784
 785        nfds = get_fds(tap->fds, fds, MAX_TAP_QUEUES);
 786        if (tap->has_vhostfds) {
 787            nvhosts = get_fds(tap->vhostfds, vhost_fds, MAX_TAP_QUEUES);
 788            if (nfds != nvhosts) {
 789                error_setg(errp, "The number of fds passed does not match "
 790                           "the number of vhostfds passed");
 791                return -1;
 792            }
 793        }
 794
 795        for (i = 0; i < nfds; i++) {
 796            fd = monitor_fd_param(cur_mon, fds[i], &err);
 797            if (fd == -1) {
 798                error_propagate(errp, err);
 799                return -1;
 800            }
 801
 802            fcntl(fd, F_SETFL, O_NONBLOCK);
 803
 804            if (i == 0) {
 805                vnet_hdr = tap_probe_vnet_hdr(fd);
 806            } else if (vnet_hdr != tap_probe_vnet_hdr(fd)) {
 807                error_setg(errp,
 808                           "vnet_hdr not consistent across given tap fds");
 809                return -1;
 810            }
 811
 812            net_init_tap_one(tap, peer, "tap", name, ifname,
 813                             script, downscript,
 814                             tap->has_vhostfds ? vhost_fds[i] : NULL,
 815                             vnet_hdr, fd, &err);
 816            if (err) {
 817                error_propagate(errp, err);
 818                return -1;
 819            }
 820        }
 821    } else if (tap->has_helper) {
 822        if (tap->has_ifname || tap->has_script || tap->has_downscript ||
 823            tap->has_vnet_hdr || tap->has_queues || tap->has_vhostfds) {
 824            error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, "
 825                       "queues=, and vhostfds= are invalid with helper=");
 826            return -1;
 827        }
 828
 829        fd = net_bridge_run_helper(tap->helper, DEFAULT_BRIDGE_INTERFACE,
 830                                   errp);
 831        if (fd == -1) {
 832            return -1;
 833        }
 834
 835        fcntl(fd, F_SETFL, O_NONBLOCK);
 836        vnet_hdr = tap_probe_vnet_hdr(fd);
 837
 838        net_init_tap_one(tap, peer, "bridge", name, ifname,
 839                         script, downscript, vhostfdname,
 840                         vnet_hdr, fd, &err);
 841        if (err) {
 842            error_propagate(errp, err);
 843            close(fd);
 844            return -1;
 845        }
 846    } else {
 847        if (tap->has_vhostfds) {
 848            error_setg(errp, "vhostfds= is invalid if fds= wasn't specified");
 849            return -1;
 850        }
 851        script = tap->has_script ? tap->script : DEFAULT_NETWORK_SCRIPT;
 852        downscript = tap->has_downscript ? tap->downscript :
 853            DEFAULT_NETWORK_DOWN_SCRIPT;
 854
 855        if (tap->has_ifname) {
 856            pstrcpy(ifname, sizeof ifname, tap->ifname);
 857        } else {
 858            ifname[0] = '\0';
 859        }
 860
 861        for (i = 0; i < queues; i++) {
 862            fd = net_tap_init(tap, &vnet_hdr, i >= 1 ? "no" : script,
 863                              ifname, sizeof ifname, queues > 1, errp);
 864            if (fd == -1) {
 865                return -1;
 866            }
 867
 868            if (queues > 1 && i == 0 && !tap->has_ifname) {
 869                if (tap_fd_get_ifname(fd, ifname)) {
 870                    error_setg(errp, "Fail to get ifname");
 871                    close(fd);
 872                    return -1;
 873                }
 874            }
 875
 876            net_init_tap_one(tap, peer, "tap", name, ifname,
 877                             i >= 1 ? "no" : script,
 878                             i >= 1 ? "no" : downscript,
 879                             vhostfdname, vnet_hdr, fd, &err);
 880            if (err) {
 881                error_propagate(errp, err);
 882                close(fd);
 883                return -1;
 884            }
 885        }
 886    }
 887
 888    return 0;
 889}
 890
 891VHostNetState *tap_get_vhost_net(NetClientState *nc)
 892{
 893    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 894    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
 895    return s->vhost_net;
 896}
 897
 898int tap_enable(NetClientState *nc)
 899{
 900    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 901    int ret;
 902
 903    if (s->enabled) {
 904        return 0;
 905    } else {
 906        ret = tap_fd_enable(s->fd);
 907        if (ret == 0) {
 908            s->enabled = true;
 909            tap_update_fd_handler(s);
 910        }
 911        return ret;
 912    }
 913}
 914
 915int tap_disable(NetClientState *nc)
 916{
 917    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 918    int ret;
 919
 920    if (s->enabled == 0) {
 921        return 0;
 922    } else {
 923        ret = tap_fd_disable(s->fd);
 924        if (ret == 0) {
 925            qemu_purge_queued_packets(nc);
 926            s->enabled = false;
 927            tap_update_fd_handler(s);
 928        }
 929        return ret;
 930    }
 931}
 932