qemu/net/tap.c
<<
>>
Prefs
   1/*
   2 * QEMU System Emulator
   3 *
   4 * Copyright (c) 2003-2008 Fabrice Bellard
   5 * Copyright (c) 2009 Red Hat, Inc.
   6 *
   7 * Permission is hereby granted, free of charge, to any person obtaining a copy
   8 * of this software and associated documentation files (the "Software"), to deal
   9 * in the Software without restriction, including without limitation the rights
  10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 * copies of the Software, and to permit persons to whom the Software is
  12 * furnished to do so, subject to the following conditions:
  13 *
  14 * The above copyright notice and this permission notice shall be included in
  15 * all copies or substantial portions of the Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23 * THE SOFTWARE.
  24 */
  25
  26#include "qemu/osdep.h"
  27#include "tap_int.h"
  28
  29
  30#include <sys/ioctl.h>
  31#include <sys/wait.h>
  32#include <sys/socket.h>
  33#include <net/if.h>
  34
  35#include "net/eth.h"
  36#include "net/net.h"
  37#include "clients.h"
  38#include "monitor/monitor.h"
  39#include "sysemu/sysemu.h"
  40#include "qapi/error.h"
  41#include "qemu-common.h"
  42#include "qemu/cutils.h"
  43#include "qemu/error-report.h"
  44#include "qemu/main-loop.h"
  45#include "qemu/sockets.h"
  46
  47#include "net/tap.h"
  48
  49#include "net/vhost_net.h"
  50
  51typedef struct TAPState {
  52    NetClientState nc;
  53    int fd;
  54    char down_script[1024];
  55    char down_script_arg[128];
  56    uint8_t buf[NET_BUFSIZE];
  57    bool read_poll;
  58    bool write_poll;
  59    bool using_vnet_hdr;
  60    bool has_ufo;
  61    bool enabled;
  62    VHostNetState *vhost_net;
  63    unsigned host_vnet_hdr_len;
  64    Notifier exit;
  65} TAPState;
  66
  67static void launch_script(const char *setup_script, const char *ifname,
  68                          int fd, Error **errp);
  69
  70static void tap_send(void *opaque);
  71static void tap_writable(void *opaque);
  72
  73static void tap_update_fd_handler(TAPState *s)
  74{
  75    qemu_set_fd_handler(s->fd,
  76                        s->read_poll && s->enabled ? tap_send : NULL,
  77                        s->write_poll && s->enabled ? tap_writable : NULL,
  78                        s);
  79}
  80
  81static void tap_read_poll(TAPState *s, bool enable)
  82{
  83    s->read_poll = enable;
  84    tap_update_fd_handler(s);
  85}
  86
  87static void tap_write_poll(TAPState *s, bool enable)
  88{
  89    s->write_poll = enable;
  90    tap_update_fd_handler(s);
  91}
  92
  93static void tap_writable(void *opaque)
  94{
  95    TAPState *s = opaque;
  96
  97    tap_write_poll(s, false);
  98
  99    qemu_flush_queued_packets(&s->nc);
 100}
 101
 102static ssize_t tap_write_packet(TAPState *s, const struct iovec *iov, int iovcnt)
 103{
 104    ssize_t len;
 105
 106    do {
 107        len = writev(s->fd, iov, iovcnt);
 108    } while (len == -1 && errno == EINTR);
 109
 110    if (len == -1 && errno == EAGAIN) {
 111        tap_write_poll(s, true);
 112        return 0;
 113    }
 114
 115    return len;
 116}
 117
 118static ssize_t tap_receive_iov(NetClientState *nc, const struct iovec *iov,
 119                               int iovcnt)
 120{
 121    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 122    const struct iovec *iovp = iov;
 123    struct iovec iov_copy[iovcnt + 1];
 124    struct virtio_net_hdr_mrg_rxbuf hdr = { };
 125
 126    if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
 127        iov_copy[0].iov_base = &hdr;
 128        iov_copy[0].iov_len =  s->host_vnet_hdr_len;
 129        memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov));
 130        iovp = iov_copy;
 131        iovcnt++;
 132    }
 133
 134    return tap_write_packet(s, iovp, iovcnt);
 135}
 136
 137static ssize_t tap_receive_raw(NetClientState *nc, const uint8_t *buf, size_t size)
 138{
 139    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 140    struct iovec iov[2];
 141    int iovcnt = 0;
 142    struct virtio_net_hdr_mrg_rxbuf hdr = { };
 143
 144    if (s->host_vnet_hdr_len) {
 145        iov[iovcnt].iov_base = &hdr;
 146        iov[iovcnt].iov_len  = s->host_vnet_hdr_len;
 147        iovcnt++;
 148    }
 149
 150    iov[iovcnt].iov_base = (char *)buf;
 151    iov[iovcnt].iov_len  = size;
 152    iovcnt++;
 153
 154    return tap_write_packet(s, iov, iovcnt);
 155}
 156
 157static ssize_t tap_receive(NetClientState *nc, const uint8_t *buf, size_t size)
 158{
 159    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 160    struct iovec iov[1];
 161
 162    if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
 163        return tap_receive_raw(nc, buf, size);
 164    }
 165
 166    iov[0].iov_base = (char *)buf;
 167    iov[0].iov_len  = size;
 168
 169    return tap_write_packet(s, iov, 1);
 170}
 171
 172#ifndef __sun__
 173ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen)
 174{
 175    return read(tapfd, buf, maxlen);
 176}
 177#endif
 178
 179static void tap_send_completed(NetClientState *nc, ssize_t len)
 180{
 181    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 182    tap_read_poll(s, true);
 183}
 184
 185static void tap_send(void *opaque)
 186{
 187    TAPState *s = opaque;
 188    int size;
 189    int packets = 0;
 190
 191    while (true) {
 192        uint8_t *buf = s->buf;
 193        uint8_t min_pkt[ETH_ZLEN];
 194        size_t min_pktsz = sizeof(min_pkt);
 195
 196        size = tap_read_packet(s->fd, s->buf, sizeof(s->buf));
 197        if (size <= 0) {
 198            break;
 199        }
 200
 201        if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
 202            buf  += s->host_vnet_hdr_len;
 203            size -= s->host_vnet_hdr_len;
 204        }
 205
 206        if (net_peer_needs_padding(&s->nc)) {
 207            if (eth_pad_short_frame(min_pkt, &min_pktsz, buf, size)) {
 208                buf = min_pkt;
 209                size = min_pktsz;
 210            }
 211        }
 212
 213        size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed);
 214        if (size == 0) {
 215            tap_read_poll(s, false);
 216            break;
 217        } else if (size < 0) {
 218            break;
 219        }
 220
 221        /*
 222         * When the host keeps receiving more packets while tap_send() is
 223         * running we can hog the QEMU global mutex.  Limit the number of
 224         * packets that are processed per tap_send() callback to prevent
 225         * stalling the guest.
 226         */
 227        packets++;
 228        if (packets >= 50) {
 229            break;
 230        }
 231    }
 232}
 233
 234static bool tap_has_ufo(NetClientState *nc)
 235{
 236    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 237
 238    assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
 239
 240    return s->has_ufo;
 241}
 242
 243static bool tap_has_vnet_hdr(NetClientState *nc)
 244{
 245    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 246
 247    assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
 248
 249    return !!s->host_vnet_hdr_len;
 250}
 251
 252static bool tap_has_vnet_hdr_len(NetClientState *nc, int len)
 253{
 254    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 255
 256    assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
 257
 258    return !!tap_probe_vnet_hdr_len(s->fd, len);
 259}
 260
 261static void tap_set_vnet_hdr_len(NetClientState *nc, int len)
 262{
 263    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 264
 265    assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
 266    assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) ||
 267           len == sizeof(struct virtio_net_hdr) ||
 268           len == sizeof(struct virtio_net_hdr_v1_hash));
 269
 270    tap_fd_set_vnet_hdr_len(s->fd, len);
 271    s->host_vnet_hdr_len = len;
 272}
 273
 274static void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr)
 275{
 276    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 277
 278    assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
 279    assert(!!s->host_vnet_hdr_len == using_vnet_hdr);
 280
 281    s->using_vnet_hdr = using_vnet_hdr;
 282}
 283
 284static int tap_set_vnet_le(NetClientState *nc, bool is_le)
 285{
 286    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 287
 288    return tap_fd_set_vnet_le(s->fd, is_le);
 289}
 290
 291static int tap_set_vnet_be(NetClientState *nc, bool is_be)
 292{
 293    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 294
 295    return tap_fd_set_vnet_be(s->fd, is_be);
 296}
 297
 298static void tap_set_offload(NetClientState *nc, int csum, int tso4,
 299                     int tso6, int ecn, int ufo)
 300{
 301    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 302    if (s->fd < 0) {
 303        return;
 304    }
 305
 306    tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo);
 307}
 308
 309static void tap_exit_notify(Notifier *notifier, void *data)
 310{
 311    TAPState *s = container_of(notifier, TAPState, exit);
 312    Error *err = NULL;
 313
 314    if (s->down_script[0]) {
 315        launch_script(s->down_script, s->down_script_arg, s->fd, &err);
 316        if (err) {
 317            error_report_err(err);
 318        }
 319    }
 320}
 321
 322static void tap_cleanup(NetClientState *nc)
 323{
 324    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 325
 326    if (s->vhost_net) {
 327        vhost_net_cleanup(s->vhost_net);
 328        g_free(s->vhost_net);
 329        s->vhost_net = NULL;
 330    }
 331
 332    qemu_purge_queued_packets(nc);
 333
 334    tap_exit_notify(&s->exit, NULL);
 335    qemu_remove_exit_notifier(&s->exit);
 336
 337    tap_read_poll(s, false);
 338    tap_write_poll(s, false);
 339    close(s->fd);
 340    s->fd = -1;
 341}
 342
 343static void tap_poll(NetClientState *nc, bool enable)
 344{
 345    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 346    tap_read_poll(s, enable);
 347    tap_write_poll(s, enable);
 348}
 349
 350static bool tap_set_steering_ebpf(NetClientState *nc, int prog_fd)
 351{
 352    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 353    assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
 354
 355    return tap_fd_set_steering_ebpf(s->fd, prog_fd) == 0;
 356}
 357
 358int tap_get_fd(NetClientState *nc)
 359{
 360    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 361    assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
 362    return s->fd;
 363}
 364
 365/* fd support */
 366
 367static NetClientInfo net_tap_info = {
 368    .type = NET_CLIENT_DRIVER_TAP,
 369    .size = sizeof(TAPState),
 370    .receive = tap_receive,
 371    .receive_raw = tap_receive_raw,
 372    .receive_iov = tap_receive_iov,
 373    .poll = tap_poll,
 374    .cleanup = tap_cleanup,
 375    .has_ufo = tap_has_ufo,
 376    .has_vnet_hdr = tap_has_vnet_hdr,
 377    .has_vnet_hdr_len = tap_has_vnet_hdr_len,
 378    .using_vnet_hdr = tap_using_vnet_hdr,
 379    .set_offload = tap_set_offload,
 380    .set_vnet_hdr_len = tap_set_vnet_hdr_len,
 381    .set_vnet_le = tap_set_vnet_le,
 382    .set_vnet_be = tap_set_vnet_be,
 383    .set_steering_ebpf = tap_set_steering_ebpf,
 384};
 385
 386static TAPState *net_tap_fd_init(NetClientState *peer,
 387                                 const char *model,
 388                                 const char *name,
 389                                 int fd,
 390                                 int vnet_hdr)
 391{
 392    NetClientState *nc;
 393    TAPState *s;
 394
 395    nc = qemu_new_net_client(&net_tap_info, peer, model, name);
 396
 397    s = DO_UPCAST(TAPState, nc, nc);
 398
 399    s->fd = fd;
 400    s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
 401    s->using_vnet_hdr = false;
 402    s->has_ufo = tap_probe_has_ufo(s->fd);
 403    s->enabled = true;
 404    tap_set_offload(&s->nc, 0, 0, 0, 0, 0);
 405    /*
 406     * Make sure host header length is set correctly in tap:
 407     * it might have been modified by another instance of qemu.
 408     */
 409    if (tap_probe_vnet_hdr_len(s->fd, s->host_vnet_hdr_len)) {
 410        tap_fd_set_vnet_hdr_len(s->fd, s->host_vnet_hdr_len);
 411    }
 412    tap_read_poll(s, true);
 413    s->vhost_net = NULL;
 414
 415    s->exit.notify = tap_exit_notify;
 416    qemu_add_exit_notifier(&s->exit);
 417
 418    return s;
 419}
 420
 421static void launch_script(const char *setup_script, const char *ifname,
 422                          int fd, Error **errp)
 423{
 424    int pid, status;
 425    char *args[3];
 426    char **parg;
 427
 428    /* try to launch network script */
 429    pid = fork();
 430    if (pid < 0) {
 431        error_setg_errno(errp, errno, "could not launch network script %s",
 432                         setup_script);
 433        return;
 434    }
 435    if (pid == 0) {
 436        int open_max = sysconf(_SC_OPEN_MAX), i;
 437
 438        for (i = 3; i < open_max; i++) {
 439            if (i != fd) {
 440                close(i);
 441            }
 442        }
 443        parg = args;
 444        *parg++ = (char *)setup_script;
 445        *parg++ = (char *)ifname;
 446        *parg = NULL;
 447        execv(setup_script, args);
 448        _exit(1);
 449    } else {
 450        while (waitpid(pid, &status, 0) != pid) {
 451            /* loop */
 452        }
 453
 454        if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
 455            return;
 456        }
 457        error_setg(errp, "network script %s failed with status %d",
 458                   setup_script, status);
 459    }
 460}
 461
 462static int recv_fd(int c)
 463{
 464    int fd;
 465    uint8_t msgbuf[CMSG_SPACE(sizeof(fd))];
 466    struct msghdr msg = {
 467        .msg_control = msgbuf,
 468        .msg_controllen = sizeof(msgbuf),
 469    };
 470    struct cmsghdr *cmsg;
 471    struct iovec iov;
 472    uint8_t req[1];
 473    ssize_t len;
 474
 475    cmsg = CMSG_FIRSTHDR(&msg);
 476    cmsg->cmsg_level = SOL_SOCKET;
 477    cmsg->cmsg_type = SCM_RIGHTS;
 478    cmsg->cmsg_len = CMSG_LEN(sizeof(fd));
 479    msg.msg_controllen = cmsg->cmsg_len;
 480
 481    iov.iov_base = req;
 482    iov.iov_len = sizeof(req);
 483
 484    msg.msg_iov = &iov;
 485    msg.msg_iovlen = 1;
 486
 487    len = recvmsg(c, &msg, 0);
 488    if (len > 0) {
 489        memcpy(&fd, CMSG_DATA(cmsg), sizeof(fd));
 490        return fd;
 491    }
 492
 493    return len;
 494}
 495
 496static int net_bridge_run_helper(const char *helper, const char *bridge,
 497                                 Error **errp)
 498{
 499    sigset_t oldmask, mask;
 500    g_autofree char *default_helper = NULL;
 501    int pid, status;
 502    char *args[5];
 503    char **parg;
 504    int sv[2];
 505
 506    sigemptyset(&mask);
 507    sigaddset(&mask, SIGCHLD);
 508    sigprocmask(SIG_BLOCK, &mask, &oldmask);
 509
 510    if (!helper) {
 511        helper = default_helper = get_relocated_path(DEFAULT_BRIDGE_HELPER);
 512    }
 513
 514    if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
 515        error_setg_errno(errp, errno, "socketpair() failed");
 516        return -1;
 517    }
 518
 519    /* try to launch bridge helper */
 520    pid = fork();
 521    if (pid < 0) {
 522        error_setg_errno(errp, errno, "Can't fork bridge helper");
 523        return -1;
 524    }
 525    if (pid == 0) {
 526        int open_max = sysconf(_SC_OPEN_MAX), i;
 527        char *fd_buf = NULL;
 528        char *br_buf = NULL;
 529        char *helper_cmd = NULL;
 530
 531        for (i = 3; i < open_max; i++) {
 532            if (i != sv[1]) {
 533                close(i);
 534            }
 535        }
 536
 537        fd_buf = g_strdup_printf("%s%d", "--fd=", sv[1]);
 538
 539        if (strrchr(helper, ' ') || strrchr(helper, '\t')) {
 540            /* assume helper is a command */
 541
 542            if (strstr(helper, "--br=") == NULL) {
 543                br_buf = g_strdup_printf("%s%s", "--br=", bridge);
 544            }
 545
 546            helper_cmd = g_strdup_printf("%s %s %s %s", helper,
 547                            "--use-vnet", fd_buf, br_buf ? br_buf : "");
 548
 549            parg = args;
 550            *parg++ = (char *)"sh";
 551            *parg++ = (char *)"-c";
 552            *parg++ = helper_cmd;
 553            *parg++ = NULL;
 554
 555            execv("/bin/sh", args);
 556            g_free(helper_cmd);
 557        } else {
 558            /* assume helper is just the executable path name */
 559
 560            br_buf = g_strdup_printf("%s%s", "--br=", bridge);
 561
 562            parg = args;
 563            *parg++ = (char *)helper;
 564            *parg++ = (char *)"--use-vnet";
 565            *parg++ = fd_buf;
 566            *parg++ = br_buf;
 567            *parg++ = NULL;
 568
 569            execv(helper, args);
 570        }
 571        g_free(fd_buf);
 572        g_free(br_buf);
 573        _exit(1);
 574
 575    } else {
 576        int fd;
 577        int saved_errno;
 578
 579        close(sv[1]);
 580
 581        do {
 582            fd = recv_fd(sv[0]);
 583        } while (fd == -1 && errno == EINTR);
 584        saved_errno = errno;
 585
 586        close(sv[0]);
 587
 588        while (waitpid(pid, &status, 0) != pid) {
 589            /* loop */
 590        }
 591        sigprocmask(SIG_SETMASK, &oldmask, NULL);
 592        if (fd < 0) {
 593            error_setg_errno(errp, saved_errno,
 594                             "failed to recv file descriptor");
 595            return -1;
 596        }
 597        if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
 598            error_setg(errp, "bridge helper failed");
 599            return -1;
 600        }
 601        return fd;
 602    }
 603}
 604
 605int net_init_bridge(const Netdev *netdev, const char *name,
 606                    NetClientState *peer, Error **errp)
 607{
 608    const NetdevBridgeOptions *bridge;
 609    const char *helper, *br;
 610    TAPState *s;
 611    int fd, vnet_hdr;
 612
 613    assert(netdev->type == NET_CLIENT_DRIVER_BRIDGE);
 614    bridge = &netdev->u.bridge;
 615    helper = bridge->has_helper ? bridge->helper : NULL;
 616    br     = bridge->has_br     ? bridge->br     : DEFAULT_BRIDGE_INTERFACE;
 617
 618    fd = net_bridge_run_helper(helper, br, errp);
 619    if (fd == -1) {
 620        return -1;
 621    }
 622
 623    qemu_set_nonblock(fd);
 624    vnet_hdr = tap_probe_vnet_hdr(fd, errp);
 625    if (vnet_hdr < 0) {
 626        close(fd);
 627        return -1;
 628    }
 629    s = net_tap_fd_init(peer, "bridge", name, fd, vnet_hdr);
 630
 631    snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s,br=%s", helper,
 632             br);
 633
 634    return 0;
 635}
 636
 637static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr,
 638                        const char *setup_script, char *ifname,
 639                        size_t ifname_sz, int mq_required, Error **errp)
 640{
 641    Error *err = NULL;
 642    int fd, vnet_hdr_required;
 643
 644    if (tap->has_vnet_hdr) {
 645        *vnet_hdr = tap->vnet_hdr;
 646        vnet_hdr_required = *vnet_hdr;
 647    } else {
 648        *vnet_hdr = 1;
 649        vnet_hdr_required = 0;
 650    }
 651
 652    TFR(fd = tap_open(ifname, ifname_sz, vnet_hdr, vnet_hdr_required,
 653                      mq_required, errp));
 654    if (fd < 0) {
 655        return -1;
 656    }
 657
 658    if (setup_script &&
 659        setup_script[0] != '\0' &&
 660        strcmp(setup_script, "no") != 0) {
 661        launch_script(setup_script, ifname, fd, &err);
 662        if (err) {
 663            error_propagate(errp, err);
 664            close(fd);
 665            return -1;
 666        }
 667    }
 668
 669    return fd;
 670}
 671
 672#define MAX_TAP_QUEUES 1024
 673
 674static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
 675                             const char *model, const char *name,
 676                             const char *ifname, const char *script,
 677                             const char *downscript, const char *vhostfdname,
 678                             int vnet_hdr, int fd, Error **errp)
 679{
 680    Error *err = NULL;
 681    TAPState *s = net_tap_fd_init(peer, model, name, fd, vnet_hdr);
 682    int vhostfd;
 683
 684    tap_set_sndbuf(s->fd, tap, &err);
 685    if (err) {
 686        error_propagate(errp, err);
 687        return;
 688    }
 689
 690    if (tap->has_fd || tap->has_fds) {
 691        snprintf(s->nc.info_str, sizeof(s->nc.info_str), "fd=%d", fd);
 692    } else if (tap->has_helper) {
 693        snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s",
 694                 tap->helper);
 695    } else {
 696        snprintf(s->nc.info_str, sizeof(s->nc.info_str),
 697                 "ifname=%s,script=%s,downscript=%s", ifname, script,
 698                 downscript);
 699
 700        if (strcmp(downscript, "no") != 0) {
 701            snprintf(s->down_script, sizeof(s->down_script), "%s", downscript);
 702            snprintf(s->down_script_arg, sizeof(s->down_script_arg),
 703                     "%s", ifname);
 704        }
 705    }
 706
 707    if (tap->has_vhost ? tap->vhost :
 708        vhostfdname || (tap->has_vhostforce && tap->vhostforce)) {
 709        VhostNetOptions options;
 710
 711        options.backend_type = VHOST_BACKEND_TYPE_KERNEL;
 712        options.net_backend = &s->nc;
 713        if (tap->has_poll_us) {
 714            options.busyloop_timeout = tap->poll_us;
 715        } else {
 716            options.busyloop_timeout = 0;
 717        }
 718
 719        if (vhostfdname) {
 720            int ret;
 721
 722            vhostfd = monitor_fd_param(monitor_cur(), vhostfdname, &err);
 723            if (vhostfd == -1) {
 724                if (tap->has_vhostforce && tap->vhostforce) {
 725                    error_propagate(errp, err);
 726                } else {
 727                    warn_report_err(err);
 728                }
 729                return;
 730            }
 731            ret = qemu_try_set_nonblock(vhostfd);
 732            if (ret < 0) {
 733                error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d",
 734                                 name, fd);
 735                return;
 736            }
 737        } else {
 738            vhostfd = open("/dev/vhost-net", O_RDWR);
 739            if (vhostfd < 0) {
 740                if (tap->has_vhostforce && tap->vhostforce) {
 741                    error_setg_errno(errp, errno,
 742                                     "tap: open vhost char device failed");
 743                } else {
 744                    warn_report("tap: open vhost char device failed: %s",
 745                                strerror(errno));
 746                }
 747                return;
 748            }
 749            qemu_set_nonblock(vhostfd);
 750        }
 751        options.opaque = (void *)(uintptr_t)vhostfd;
 752        options.nvqs = 2;
 753
 754        s->vhost_net = vhost_net_init(&options);
 755        if (!s->vhost_net) {
 756            if (tap->has_vhostforce && tap->vhostforce) {
 757                error_setg(errp, VHOST_NET_INIT_FAILED);
 758            } else {
 759                warn_report(VHOST_NET_INIT_FAILED);
 760            }
 761            return;
 762        }
 763    } else if (vhostfdname) {
 764        error_setg(errp, "vhostfd(s)= is not valid without vhost");
 765    }
 766}
 767
 768static int get_fds(char *str, char *fds[], int max)
 769{
 770    char *ptr = str, *this;
 771    size_t len = strlen(str);
 772    int i = 0;
 773
 774    while (i < max && ptr < str + len) {
 775        this = strchr(ptr, ':');
 776
 777        if (this == NULL) {
 778            fds[i] = g_strdup(ptr);
 779        } else {
 780            fds[i] = g_strndup(ptr, this - ptr);
 781        }
 782
 783        i++;
 784        if (this == NULL) {
 785            break;
 786        } else {
 787            ptr = this + 1;
 788        }
 789    }
 790
 791    return i;
 792}
 793
 794int net_init_tap(const Netdev *netdev, const char *name,
 795                 NetClientState *peer, Error **errp)
 796{
 797    const NetdevTapOptions *tap;
 798    int fd, vnet_hdr = 0, i = 0, queues;
 799    /* for the no-fd, no-helper case */
 800    const char *script;
 801    const char *downscript;
 802    Error *err = NULL;
 803    const char *vhostfdname;
 804    char ifname[128];
 805    int ret = 0;
 806
 807    assert(netdev->type == NET_CLIENT_DRIVER_TAP);
 808    tap = &netdev->u.tap;
 809    queues = tap->has_queues ? tap->queues : 1;
 810    vhostfdname = tap->has_vhostfd ? tap->vhostfd : NULL;
 811    script = tap->has_script ? tap->script : NULL;
 812    downscript = tap->has_downscript ? tap->downscript : NULL;
 813
 814    /* QEMU hubs do not support multiqueue tap, in this case peer is set.
 815     * For -netdev, peer is always NULL. */
 816    if (peer && (tap->has_queues || tap->has_fds || tap->has_vhostfds)) {
 817        error_setg(errp, "Multiqueue tap cannot be used with hubs");
 818        return -1;
 819    }
 820
 821    if (tap->has_fd) {
 822        if (tap->has_ifname || tap->has_script || tap->has_downscript ||
 823            tap->has_vnet_hdr || tap->has_helper || tap->has_queues ||
 824            tap->has_fds || tap->has_vhostfds) {
 825            error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, "
 826                       "helper=, queues=, fds=, and vhostfds= "
 827                       "are invalid with fd=");
 828            return -1;
 829        }
 830
 831        fd = monitor_fd_param(monitor_cur(), tap->fd, errp);
 832        if (fd == -1) {
 833            return -1;
 834        }
 835
 836        ret = qemu_try_set_nonblock(fd);
 837        if (ret < 0) {
 838            error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d",
 839                             name, fd);
 840            close(fd);
 841            return -1;
 842        }
 843
 844        vnet_hdr = tap_probe_vnet_hdr(fd, errp);
 845        if (vnet_hdr < 0) {
 846            close(fd);
 847            return -1;
 848        }
 849
 850        net_init_tap_one(tap, peer, "tap", name, NULL,
 851                         script, downscript,
 852                         vhostfdname, vnet_hdr, fd, &err);
 853        if (err) {
 854            error_propagate(errp, err);
 855            close(fd);
 856            return -1;
 857        }
 858    } else if (tap->has_fds) {
 859        char **fds;
 860        char **vhost_fds;
 861        int nfds = 0, nvhosts = 0;
 862
 863        if (tap->has_ifname || tap->has_script || tap->has_downscript ||
 864            tap->has_vnet_hdr || tap->has_helper || tap->has_queues ||
 865            tap->has_vhostfd) {
 866            error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, "
 867                       "helper=, queues=, and vhostfd= "
 868                       "are invalid with fds=");
 869            return -1;
 870        }
 871
 872        fds = g_new0(char *, MAX_TAP_QUEUES);
 873        vhost_fds = g_new0(char *, MAX_TAP_QUEUES);
 874
 875        nfds = get_fds(tap->fds, fds, MAX_TAP_QUEUES);
 876        if (tap->has_vhostfds) {
 877            nvhosts = get_fds(tap->vhostfds, vhost_fds, MAX_TAP_QUEUES);
 878            if (nfds != nvhosts) {
 879                error_setg(errp, "The number of fds passed does not match "
 880                           "the number of vhostfds passed");
 881                ret = -1;
 882                goto free_fail;
 883            }
 884        }
 885
 886        for (i = 0; i < nfds; i++) {
 887            fd = monitor_fd_param(monitor_cur(), fds[i], errp);
 888            if (fd == -1) {
 889                ret = -1;
 890                goto free_fail;
 891            }
 892
 893            ret = qemu_try_set_nonblock(fd);
 894            if (ret < 0) {
 895                error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d",
 896                                 name, fd);
 897                goto free_fail;
 898            }
 899
 900            if (i == 0) {
 901                vnet_hdr = tap_probe_vnet_hdr(fd, errp);
 902                if (vnet_hdr < 0) {
 903                    goto free_fail;
 904                }
 905            } else if (vnet_hdr != tap_probe_vnet_hdr(fd, NULL)) {
 906                error_setg(errp,
 907                           "vnet_hdr not consistent across given tap fds");
 908                ret = -1;
 909                goto free_fail;
 910            }
 911
 912            net_init_tap_one(tap, peer, "tap", name, ifname,
 913                             script, downscript,
 914                             tap->has_vhostfds ? vhost_fds[i] : NULL,
 915                             vnet_hdr, fd, &err);
 916            if (err) {
 917                error_propagate(errp, err);
 918                ret = -1;
 919                goto free_fail;
 920            }
 921        }
 922
 923free_fail:
 924        for (i = 0; i < nvhosts; i++) {
 925            g_free(vhost_fds[i]);
 926        }
 927        for (i = 0; i < nfds; i++) {
 928            g_free(fds[i]);
 929        }
 930        g_free(fds);
 931        g_free(vhost_fds);
 932        return ret;
 933    } else if (tap->has_helper) {
 934        if (tap->has_ifname || tap->has_script || tap->has_downscript ||
 935            tap->has_vnet_hdr || tap->has_queues || tap->has_vhostfds) {
 936            error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, "
 937                       "queues=, and vhostfds= are invalid with helper=");
 938            return -1;
 939        }
 940
 941        fd = net_bridge_run_helper(tap->helper,
 942                                   tap->has_br ?
 943                                   tap->br : DEFAULT_BRIDGE_INTERFACE,
 944                                   errp);
 945        if (fd == -1) {
 946            return -1;
 947        }
 948
 949        qemu_set_nonblock(fd);
 950        vnet_hdr = tap_probe_vnet_hdr(fd, errp);
 951        if (vnet_hdr < 0) {
 952            close(fd);
 953            return -1;
 954        }
 955
 956        net_init_tap_one(tap, peer, "bridge", name, ifname,
 957                         script, downscript, vhostfdname,
 958                         vnet_hdr, fd, &err);
 959        if (err) {
 960            error_propagate(errp, err);
 961            close(fd);
 962            return -1;
 963        }
 964    } else {
 965        g_autofree char *default_script = NULL;
 966        g_autofree char *default_downscript = NULL;
 967        if (tap->has_vhostfds) {
 968            error_setg(errp, "vhostfds= is invalid if fds= wasn't specified");
 969            return -1;
 970        }
 971
 972        if (!script) {
 973            script = default_script = get_relocated_path(DEFAULT_NETWORK_SCRIPT);
 974        }
 975        if (!downscript) {
 976            downscript = default_downscript =
 977                                 get_relocated_path(DEFAULT_NETWORK_DOWN_SCRIPT);
 978        }
 979
 980        if (tap->has_ifname) {
 981            pstrcpy(ifname, sizeof ifname, tap->ifname);
 982        } else {
 983            ifname[0] = '\0';
 984        }
 985
 986        for (i = 0; i < queues; i++) {
 987            fd = net_tap_init(tap, &vnet_hdr, i >= 1 ? "no" : script,
 988                              ifname, sizeof ifname, queues > 1, errp);
 989            if (fd == -1) {
 990                return -1;
 991            }
 992
 993            if (queues > 1 && i == 0 && !tap->has_ifname) {
 994                if (tap_fd_get_ifname(fd, ifname)) {
 995                    error_setg(errp, "Fail to get ifname");
 996                    close(fd);
 997                    return -1;
 998                }
 999            }
1000
1001            net_init_tap_one(tap, peer, "tap", name, ifname,
1002                             i >= 1 ? "no" : script,
1003                             i >= 1 ? "no" : downscript,
1004                             vhostfdname, vnet_hdr, fd, &err);
1005            if (err) {
1006                error_propagate(errp, err);
1007                close(fd);
1008                return -1;
1009            }
1010        }
1011    }
1012
1013    return 0;
1014}
1015
1016VHostNetState *tap_get_vhost_net(NetClientState *nc)
1017{
1018    TAPState *s = DO_UPCAST(TAPState, nc, nc);
1019    assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
1020    return s->vhost_net;
1021}
1022
1023int tap_enable(NetClientState *nc)
1024{
1025    TAPState *s = DO_UPCAST(TAPState, nc, nc);
1026    int ret;
1027
1028    if (s->enabled) {
1029        return 0;
1030    } else {
1031        ret = tap_fd_enable(s->fd);
1032        if (ret == 0) {
1033            s->enabled = true;
1034            tap_update_fd_handler(s);
1035        }
1036        return ret;
1037    }
1038}
1039
1040int tap_disable(NetClientState *nc)
1041{
1042    TAPState *s = DO_UPCAST(TAPState, nc, nc);
1043    int ret;
1044
1045    if (s->enabled == 0) {
1046        return 0;
1047    } else {
1048        ret = tap_fd_disable(s->fd);
1049        if (ret == 0) {
1050            qemu_purge_queued_packets(nc);
1051            s->enabled = false;
1052            tap_update_fd_handler(s);
1053        }
1054        return ret;
1055    }
1056}
1057