qemu/net/tap.c
<<
>>
Prefs
   1/*
   2 * QEMU System Emulator
   3 *
   4 * Copyright (c) 2003-2008 Fabrice Bellard
   5 * Copyright (c) 2009 Red Hat, Inc.
   6 *
   7 * Permission is hereby granted, free of charge, to any person obtaining a copy
   8 * of this software and associated documentation files (the "Software"), to deal
   9 * in the Software without restriction, including without limitation the rights
  10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 * copies of the Software, and to permit persons to whom the Software is
  12 * furnished to do so, subject to the following conditions:
  13 *
  14 * The above copyright notice and this permission notice shall be included in
  15 * all copies or substantial portions of the Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23 * THE SOFTWARE.
  24 */
  25
  26#include "qemu/osdep.h"
  27#include "tap_int.h"
  28
  29
  30#include <sys/ioctl.h>
  31#include <sys/wait.h>
  32#include <sys/socket.h>
  33#include <net/if.h>
  34
  35#include "net/eth.h"
  36#include "net/net.h"
  37#include "clients.h"
  38#include "monitor/monitor.h"
  39#include "sysemu/sysemu.h"
  40#include "qapi/error.h"
  41#include "qemu-common.h"
  42#include "qemu/cutils.h"
  43#include "qemu/error-report.h"
  44#include "qemu/main-loop.h"
  45#include "qemu/sockets.h"
  46
  47#include "net/tap.h"
  48
  49#include "net/vhost_net.h"
  50
  51typedef struct TAPState {
  52    NetClientState nc;
  53    int fd;
  54    char down_script[1024];
  55    char down_script_arg[128];
  56    uint8_t buf[NET_BUFSIZE];
  57    bool read_poll;
  58    bool write_poll;
  59    bool using_vnet_hdr;
  60    bool has_ufo;
  61    bool enabled;
  62    VHostNetState *vhost_net;
  63    unsigned host_vnet_hdr_len;
  64    Notifier exit;
  65} TAPState;
  66
  67static void launch_script(const char *setup_script, const char *ifname,
  68                          int fd, Error **errp);
  69
  70static void tap_send(void *opaque);
  71static void tap_writable(void *opaque);
  72
  73static void tap_update_fd_handler(TAPState *s)
  74{
  75    qemu_set_fd_handler(s->fd,
  76                        s->read_poll && s->enabled ? tap_send : NULL,
  77                        s->write_poll && s->enabled ? tap_writable : NULL,
  78                        s);
  79}
  80
  81static void tap_read_poll(TAPState *s, bool enable)
  82{
  83    s->read_poll = enable;
  84    tap_update_fd_handler(s);
  85}
  86
  87static void tap_write_poll(TAPState *s, bool enable)
  88{
  89    s->write_poll = enable;
  90    tap_update_fd_handler(s);
  91}
  92
  93static void tap_writable(void *opaque)
  94{
  95    TAPState *s = opaque;
  96
  97    tap_write_poll(s, false);
  98
  99    qemu_flush_queued_packets(&s->nc);
 100}
 101
 102static ssize_t tap_write_packet(TAPState *s, const struct iovec *iov, int iovcnt)
 103{
 104    ssize_t len;
 105
 106    do {
 107        len = writev(s->fd, iov, iovcnt);
 108    } while (len == -1 && errno == EINTR);
 109
 110    if (len == -1 && errno == EAGAIN) {
 111        tap_write_poll(s, true);
 112        return 0;
 113    }
 114
 115    return len;
 116}
 117
 118static ssize_t tap_receive_iov(NetClientState *nc, const struct iovec *iov,
 119                               int iovcnt)
 120{
 121    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 122    const struct iovec *iovp = iov;
 123    struct iovec iov_copy[iovcnt + 1];
 124    struct virtio_net_hdr_mrg_rxbuf hdr = { };
 125
 126    if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
 127        iov_copy[0].iov_base = &hdr;
 128        iov_copy[0].iov_len =  s->host_vnet_hdr_len;
 129        memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov));
 130        iovp = iov_copy;
 131        iovcnt++;
 132    }
 133
 134    return tap_write_packet(s, iovp, iovcnt);
 135}
 136
 137static ssize_t tap_receive_raw(NetClientState *nc, const uint8_t *buf, size_t size)
 138{
 139    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 140    struct iovec iov[2];
 141    int iovcnt = 0;
 142    struct virtio_net_hdr_mrg_rxbuf hdr = { };
 143
 144    if (s->host_vnet_hdr_len) {
 145        iov[iovcnt].iov_base = &hdr;
 146        iov[iovcnt].iov_len  = s->host_vnet_hdr_len;
 147        iovcnt++;
 148    }
 149
 150    iov[iovcnt].iov_base = (char *)buf;
 151    iov[iovcnt].iov_len  = size;
 152    iovcnt++;
 153
 154    return tap_write_packet(s, iov, iovcnt);
 155}
 156
 157static ssize_t tap_receive(NetClientState *nc, const uint8_t *buf, size_t size)
 158{
 159    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 160    struct iovec iov[1];
 161
 162    if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
 163        return tap_receive_raw(nc, buf, size);
 164    }
 165
 166    iov[0].iov_base = (char *)buf;
 167    iov[0].iov_len  = size;
 168
 169    return tap_write_packet(s, iov, 1);
 170}
 171
 172#ifndef __sun__
 173ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen)
 174{
 175    return read(tapfd, buf, maxlen);
 176}
 177#endif
 178
 179static void tap_send_completed(NetClientState *nc, ssize_t len)
 180{
 181    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 182    tap_read_poll(s, true);
 183}
 184
 185static void tap_send(void *opaque)
 186{
 187    TAPState *s = opaque;
 188    int size;
 189    int packets = 0;
 190
 191    while (true) {
 192        uint8_t *buf = s->buf;
 193        uint8_t min_pkt[ETH_ZLEN];
 194        size_t min_pktsz = sizeof(min_pkt);
 195
 196        size = tap_read_packet(s->fd, s->buf, sizeof(s->buf));
 197        if (size <= 0) {
 198            break;
 199        }
 200
 201        if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
 202            buf  += s->host_vnet_hdr_len;
 203            size -= s->host_vnet_hdr_len;
 204        }
 205
 206        if (net_peer_needs_padding(&s->nc)) {
 207            if (eth_pad_short_frame(min_pkt, &min_pktsz, buf, size)) {
 208                buf = min_pkt;
 209                size = min_pktsz;
 210            }
 211        }
 212
 213        size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed);
 214        if (size == 0) {
 215            tap_read_poll(s, false);
 216            break;
 217        } else if (size < 0) {
 218            break;
 219        }
 220
 221        /*
 222         * When the host keeps receiving more packets while tap_send() is
 223         * running we can hog the QEMU global mutex.  Limit the number of
 224         * packets that are processed per tap_send() callback to prevent
 225         * stalling the guest.
 226         */
 227        packets++;
 228        if (packets >= 50) {
 229            break;
 230        }
 231    }
 232}
 233
 234static bool tap_has_ufo(NetClientState *nc)
 235{
 236    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 237
 238    assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
 239
 240    return s->has_ufo;
 241}
 242
 243static bool tap_has_vnet_hdr(NetClientState *nc)
 244{
 245    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 246
 247    assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
 248
 249    return !!s->host_vnet_hdr_len;
 250}
 251
 252static bool tap_has_vnet_hdr_len(NetClientState *nc, int len)
 253{
 254    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 255
 256    assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
 257
 258    return !!tap_probe_vnet_hdr_len(s->fd, len);
 259}
 260
 261static void tap_set_vnet_hdr_len(NetClientState *nc, int len)
 262{
 263    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 264
 265    assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
 266    assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) ||
 267           len == sizeof(struct virtio_net_hdr) ||
 268           len == sizeof(struct virtio_net_hdr_v1_hash));
 269
 270    tap_fd_set_vnet_hdr_len(s->fd, len);
 271    s->host_vnet_hdr_len = len;
 272}
 273
 274static void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr)
 275{
 276    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 277
 278    assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
 279    assert(!!s->host_vnet_hdr_len == using_vnet_hdr);
 280
 281    s->using_vnet_hdr = using_vnet_hdr;
 282}
 283
 284static int tap_set_vnet_le(NetClientState *nc, bool is_le)
 285{
 286    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 287
 288    return tap_fd_set_vnet_le(s->fd, is_le);
 289}
 290
 291static int tap_set_vnet_be(NetClientState *nc, bool is_be)
 292{
 293    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 294
 295    return tap_fd_set_vnet_be(s->fd, is_be);
 296}
 297
 298static void tap_set_offload(NetClientState *nc, int csum, int tso4,
 299                     int tso6, int ecn, int ufo)
 300{
 301    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 302    if (s->fd < 0) {
 303        return;
 304    }
 305
 306    tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo);
 307}
 308
 309static void tap_exit_notify(Notifier *notifier, void *data)
 310{
 311    TAPState *s = container_of(notifier, TAPState, exit);
 312    Error *err = NULL;
 313
 314    if (s->down_script[0]) {
 315        launch_script(s->down_script, s->down_script_arg, s->fd, &err);
 316        if (err) {
 317            error_report_err(err);
 318        }
 319    }
 320}
 321
 322static void tap_cleanup(NetClientState *nc)
 323{
 324    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 325
 326    if (s->vhost_net) {
 327        vhost_net_cleanup(s->vhost_net);
 328        g_free(s->vhost_net);
 329        s->vhost_net = NULL;
 330    }
 331
 332    qemu_purge_queued_packets(nc);
 333
 334    tap_exit_notify(&s->exit, NULL);
 335    qemu_remove_exit_notifier(&s->exit);
 336
 337    tap_read_poll(s, false);
 338    tap_write_poll(s, false);
 339    close(s->fd);
 340    s->fd = -1;
 341}
 342
 343static void tap_poll(NetClientState *nc, bool enable)
 344{
 345    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 346    tap_read_poll(s, enable);
 347    tap_write_poll(s, enable);
 348}
 349
 350static bool tap_set_steering_ebpf(NetClientState *nc, int prog_fd)
 351{
 352    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 353    assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
 354
 355    return tap_fd_set_steering_ebpf(s->fd, prog_fd) == 0;
 356}
 357
 358int tap_get_fd(NetClientState *nc)
 359{
 360    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 361    assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
 362    return s->fd;
 363}
 364
 365/* fd support */
 366
 367static NetClientInfo net_tap_info = {
 368    .type = NET_CLIENT_DRIVER_TAP,
 369    .size = sizeof(TAPState),
 370    .receive = tap_receive,
 371    .receive_raw = tap_receive_raw,
 372    .receive_iov = tap_receive_iov,
 373    .poll = tap_poll,
 374    .cleanup = tap_cleanup,
 375    .has_ufo = tap_has_ufo,
 376    .has_vnet_hdr = tap_has_vnet_hdr,
 377    .has_vnet_hdr_len = tap_has_vnet_hdr_len,
 378    .using_vnet_hdr = tap_using_vnet_hdr,
 379    .set_offload = tap_set_offload,
 380    .set_vnet_hdr_len = tap_set_vnet_hdr_len,
 381    .set_vnet_le = tap_set_vnet_le,
 382    .set_vnet_be = tap_set_vnet_be,
 383    .set_steering_ebpf = tap_set_steering_ebpf,
 384};
 385
 386static TAPState *net_tap_fd_init(NetClientState *peer,
 387                                 const char *model,
 388                                 const char *name,
 389                                 int fd,
 390                                 int vnet_hdr)
 391{
 392    NetClientState *nc;
 393    TAPState *s;
 394
 395    nc = qemu_new_net_client(&net_tap_info, peer, model, name);
 396
 397    s = DO_UPCAST(TAPState, nc, nc);
 398
 399    s->fd = fd;
 400    s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
 401    s->using_vnet_hdr = false;
 402    s->has_ufo = tap_probe_has_ufo(s->fd);
 403    s->enabled = true;
 404    tap_set_offload(&s->nc, 0, 0, 0, 0, 0);
 405    /*
 406     * Make sure host header length is set correctly in tap:
 407     * it might have been modified by another instance of qemu.
 408     */
 409    if (tap_probe_vnet_hdr_len(s->fd, s->host_vnet_hdr_len)) {
 410        tap_fd_set_vnet_hdr_len(s->fd, s->host_vnet_hdr_len);
 411    }
 412    tap_read_poll(s, true);
 413    s->vhost_net = NULL;
 414
 415    s->exit.notify = tap_exit_notify;
 416    qemu_add_exit_notifier(&s->exit);
 417
 418    return s;
 419}
 420
 421static void launch_script(const char *setup_script, const char *ifname,
 422                          int fd, Error **errp)
 423{
 424    int pid, status;
 425    char *args[3];
 426    char **parg;
 427
 428    /* try to launch network script */
 429    pid = fork();
 430    if (pid < 0) {
 431        error_setg_errno(errp, errno, "could not launch network script %s",
 432                         setup_script);
 433        return;
 434    }
 435    if (pid == 0) {
 436        int open_max = sysconf(_SC_OPEN_MAX), i;
 437
 438        for (i = 3; i < open_max; i++) {
 439            if (i != fd) {
 440                close(i);
 441            }
 442        }
 443        parg = args;
 444        *parg++ = (char *)setup_script;
 445        *parg++ = (char *)ifname;
 446        *parg = NULL;
 447        execv(setup_script, args);
 448        _exit(1);
 449    } else {
 450        while (waitpid(pid, &status, 0) != pid) {
 451            /* loop */
 452        }
 453
 454        if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
 455            return;
 456        }
 457        error_setg(errp, "network script %s failed with status %d",
 458                   setup_script, status);
 459    }
 460}
 461
 462static int recv_fd(int c)
 463{
 464    int fd;
 465    uint8_t msgbuf[CMSG_SPACE(sizeof(fd))];
 466    struct msghdr msg = {
 467        .msg_control = msgbuf,
 468        .msg_controllen = sizeof(msgbuf),
 469    };
 470    struct cmsghdr *cmsg;
 471    struct iovec iov;
 472    uint8_t req[1];
 473    ssize_t len;
 474
 475    cmsg = CMSG_FIRSTHDR(&msg);
 476    cmsg->cmsg_level = SOL_SOCKET;
 477    cmsg->cmsg_type = SCM_RIGHTS;
 478    cmsg->cmsg_len = CMSG_LEN(sizeof(fd));
 479    msg.msg_controllen = cmsg->cmsg_len;
 480
 481    iov.iov_base = req;
 482    iov.iov_len = sizeof(req);
 483
 484    msg.msg_iov = &iov;
 485    msg.msg_iovlen = 1;
 486
 487    len = recvmsg(c, &msg, 0);
 488    if (len > 0) {
 489        memcpy(&fd, CMSG_DATA(cmsg), sizeof(fd));
 490        return fd;
 491    }
 492
 493    return len;
 494}
 495
 496static int net_bridge_run_helper(const char *helper, const char *bridge,
 497                                 Error **errp)
 498{
 499    sigset_t oldmask, mask;
 500    g_autofree char *default_helper = NULL;
 501    int pid, status;
 502    char *args[5];
 503    char **parg;
 504    int sv[2];
 505
 506    sigemptyset(&mask);
 507    sigaddset(&mask, SIGCHLD);
 508    sigprocmask(SIG_BLOCK, &mask, &oldmask);
 509
 510    if (!helper) {
 511        helper = default_helper = get_relocated_path(DEFAULT_BRIDGE_HELPER);
 512    }
 513
 514    if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
 515        error_setg_errno(errp, errno, "socketpair() failed");
 516        return -1;
 517    }
 518
 519    /* try to launch bridge helper */
 520    pid = fork();
 521    if (pid < 0) {
 522        error_setg_errno(errp, errno, "Can't fork bridge helper");
 523        return -1;
 524    }
 525    if (pid == 0) {
 526        int open_max = sysconf(_SC_OPEN_MAX), i;
 527        char *fd_buf = NULL;
 528        char *br_buf = NULL;
 529        char *helper_cmd = NULL;
 530
 531        for (i = 3; i < open_max; i++) {
 532            if (i != sv[1]) {
 533                close(i);
 534            }
 535        }
 536
 537        fd_buf = g_strdup_printf("%s%d", "--fd=", sv[1]);
 538
 539        if (strrchr(helper, ' ') || strrchr(helper, '\t')) {
 540            /* assume helper is a command */
 541
 542            if (strstr(helper, "--br=") == NULL) {
 543                br_buf = g_strdup_printf("%s%s", "--br=", bridge);
 544            }
 545
 546            helper_cmd = g_strdup_printf("%s %s %s %s", helper,
 547                            "--use-vnet", fd_buf, br_buf ? br_buf : "");
 548
 549            parg = args;
 550            *parg++ = (char *)"sh";
 551            *parg++ = (char *)"-c";
 552            *parg++ = helper_cmd;
 553            *parg++ = NULL;
 554
 555            execv("/bin/sh", args);
 556            g_free(helper_cmd);
 557        } else {
 558            /* assume helper is just the executable path name */
 559
 560            br_buf = g_strdup_printf("%s%s", "--br=", bridge);
 561
 562            parg = args;
 563            *parg++ = (char *)helper;
 564            *parg++ = (char *)"--use-vnet";
 565            *parg++ = fd_buf;
 566            *parg++ = br_buf;
 567            *parg++ = NULL;
 568
 569            execv(helper, args);
 570        }
 571        g_free(fd_buf);
 572        g_free(br_buf);
 573        _exit(1);
 574
 575    } else {
 576        int fd;
 577        int saved_errno;
 578
 579        close(sv[1]);
 580
 581        do {
 582            fd = recv_fd(sv[0]);
 583        } while (fd == -1 && errno == EINTR);
 584        saved_errno = errno;
 585
 586        close(sv[0]);
 587
 588        while (waitpid(pid, &status, 0) != pid) {
 589            /* loop */
 590        }
 591        sigprocmask(SIG_SETMASK, &oldmask, NULL);
 592        if (fd < 0) {
 593            error_setg_errno(errp, saved_errno,
 594                             "failed to recv file descriptor");
 595            return -1;
 596        }
 597        if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
 598            error_setg(errp, "bridge helper failed");
 599            return -1;
 600        }
 601        return fd;
 602    }
 603}
 604
 605int net_init_bridge(const Netdev *netdev, const char *name,
 606                    NetClientState *peer, Error **errp)
 607{
 608    const NetdevBridgeOptions *bridge;
 609    const char *helper, *br;
 610    TAPState *s;
 611    int fd, vnet_hdr;
 612
 613    assert(netdev->type == NET_CLIENT_DRIVER_BRIDGE);
 614    bridge = &netdev->u.bridge;
 615    helper = bridge->has_helper ? bridge->helper : NULL;
 616    br     = bridge->has_br     ? bridge->br     : DEFAULT_BRIDGE_INTERFACE;
 617
 618    fd = net_bridge_run_helper(helper, br, errp);
 619    if (fd == -1) {
 620        return -1;
 621    }
 622
 623    qemu_set_nonblock(fd);
 624    vnet_hdr = tap_probe_vnet_hdr(fd, errp);
 625    if (vnet_hdr < 0) {
 626        close(fd);
 627        return -1;
 628    }
 629    s = net_tap_fd_init(peer, "bridge", name, fd, vnet_hdr);
 630
 631    snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s,br=%s", helper,
 632             br);
 633
 634    return 0;
 635}
 636
 637static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr,
 638                        const char *setup_script, char *ifname,
 639                        size_t ifname_sz, int mq_required, Error **errp)
 640{
 641    Error *err = NULL;
 642    int fd, vnet_hdr_required;
 643
 644    if (tap->has_vnet_hdr) {
 645        *vnet_hdr = tap->vnet_hdr;
 646        vnet_hdr_required = *vnet_hdr;
 647    } else {
 648        *vnet_hdr = 1;
 649        vnet_hdr_required = 0;
 650    }
 651
 652    TFR(fd = tap_open(ifname, ifname_sz, vnet_hdr, vnet_hdr_required,
 653                      mq_required, errp));
 654    if (fd < 0) {
 655        return -1;
 656    }
 657
 658    if (setup_script &&
 659        setup_script[0] != '\0' &&
 660        strcmp(setup_script, "no") != 0) {
 661        launch_script(setup_script, ifname, fd, &err);
 662        if (err) {
 663            error_propagate(errp, err);
 664            close(fd);
 665            return -1;
 666        }
 667    }
 668
 669    return fd;
 670}
 671
 672#define MAX_TAP_QUEUES 1024
 673
 674static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
 675                             const char *model, const char *name,
 676                             const char *ifname, const char *script,
 677                             const char *downscript, const char *vhostfdname,
 678                             int vnet_hdr, int fd, Error **errp)
 679{
 680    Error *err = NULL;
 681    TAPState *s = net_tap_fd_init(peer, model, name, fd, vnet_hdr);
 682    int vhostfd;
 683
 684    tap_set_sndbuf(s->fd, tap, &err);
 685    if (err) {
 686        error_propagate(errp, err);
 687        return;
 688    }
 689
 690    if (tap->has_fd || tap->has_fds) {
 691        snprintf(s->nc.info_str, sizeof(s->nc.info_str), "fd=%d", fd);
 692    } else if (tap->has_helper) {
 693        snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s",
 694                 tap->helper);
 695    } else {
 696        snprintf(s->nc.info_str, sizeof(s->nc.info_str),
 697                 "ifname=%s,script=%s,downscript=%s", ifname, script,
 698                 downscript);
 699
 700        if (strcmp(downscript, "no") != 0) {
 701            snprintf(s->down_script, sizeof(s->down_script), "%s", downscript);
 702            snprintf(s->down_script_arg, sizeof(s->down_script_arg),
 703                     "%s", ifname);
 704        }
 705    }
 706
 707    if (tap->has_vhost ? tap->vhost :
 708        vhostfdname || (tap->has_vhostforce && tap->vhostforce)) {
 709        VhostNetOptions options;
 710
 711        options.backend_type = VHOST_BACKEND_TYPE_KERNEL;
 712        options.net_backend = &s->nc;
 713        if (tap->has_poll_us) {
 714            options.busyloop_timeout = tap->poll_us;
 715        } else {
 716            options.busyloop_timeout = 0;
 717        }
 718
 719        if (vhostfdname) {
 720            int ret;
 721
 722            vhostfd = monitor_fd_param(monitor_cur(), vhostfdname, &err);
 723            if (vhostfd == -1) {
 724                if (tap->has_vhostforce && tap->vhostforce) {
 725                    error_propagate(errp, err);
 726                } else {
 727                    warn_report_err(err);
 728                }
 729                return;
 730            }
 731            ret = qemu_try_set_nonblock(vhostfd);
 732            if (ret < 0) {
 733                error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d",
 734                                 name, fd);
 735                return;
 736            }
 737        } else {
 738            vhostfd = open("/dev/vhost-net", O_RDWR);
 739            if (vhostfd < 0) {
 740                if (tap->has_vhostforce && tap->vhostforce) {
 741                    error_setg_errno(errp, errno,
 742                                     "tap: open vhost char device failed");
 743                } else {
 744                    warn_report("tap: open vhost char device failed: %s",
 745                                strerror(errno));
 746                }
 747                return;
 748            }
 749            qemu_set_nonblock(vhostfd);
 750        }
 751        options.opaque = (void *)(uintptr_t)vhostfd;
 752
 753        s->vhost_net = vhost_net_init(&options);
 754        if (!s->vhost_net) {
 755            if (tap->has_vhostforce && tap->vhostforce) {
 756                error_setg(errp, VHOST_NET_INIT_FAILED);
 757            } else {
 758                warn_report(VHOST_NET_INIT_FAILED);
 759            }
 760            return;
 761        }
 762    } else if (vhostfdname) {
 763        error_setg(errp, "vhostfd(s)= is not valid without vhost");
 764    }
 765}
 766
 767static int get_fds(char *str, char *fds[], int max)
 768{
 769    char *ptr = str, *this;
 770    size_t len = strlen(str);
 771    int i = 0;
 772
 773    while (i < max && ptr < str + len) {
 774        this = strchr(ptr, ':');
 775
 776        if (this == NULL) {
 777            fds[i] = g_strdup(ptr);
 778        } else {
 779            fds[i] = g_strndup(ptr, this - ptr);
 780        }
 781
 782        i++;
 783        if (this == NULL) {
 784            break;
 785        } else {
 786            ptr = this + 1;
 787        }
 788    }
 789
 790    return i;
 791}
 792
 793int net_init_tap(const Netdev *netdev, const char *name,
 794                 NetClientState *peer, Error **errp)
 795{
 796    const NetdevTapOptions *tap;
 797    int fd, vnet_hdr = 0, i = 0, queues;
 798    /* for the no-fd, no-helper case */
 799    const char *script;
 800    const char *downscript;
 801    Error *err = NULL;
 802    const char *vhostfdname;
 803    char ifname[128];
 804    int ret = 0;
 805
 806    assert(netdev->type == NET_CLIENT_DRIVER_TAP);
 807    tap = &netdev->u.tap;
 808    queues = tap->has_queues ? tap->queues : 1;
 809    vhostfdname = tap->has_vhostfd ? tap->vhostfd : NULL;
 810    script = tap->has_script ? tap->script : NULL;
 811    downscript = tap->has_downscript ? tap->downscript : NULL;
 812
 813    /* QEMU hubs do not support multiqueue tap, in this case peer is set.
 814     * For -netdev, peer is always NULL. */
 815    if (peer && (tap->has_queues || tap->has_fds || tap->has_vhostfds)) {
 816        error_setg(errp, "Multiqueue tap cannot be used with hubs");
 817        return -1;
 818    }
 819
 820    if (tap->has_fd) {
 821        if (tap->has_ifname || tap->has_script || tap->has_downscript ||
 822            tap->has_vnet_hdr || tap->has_helper || tap->has_queues ||
 823            tap->has_fds || tap->has_vhostfds) {
 824            error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, "
 825                       "helper=, queues=, fds=, and vhostfds= "
 826                       "are invalid with fd=");
 827            return -1;
 828        }
 829
 830        fd = monitor_fd_param(monitor_cur(), tap->fd, errp);
 831        if (fd == -1) {
 832            return -1;
 833        }
 834
 835        ret = qemu_try_set_nonblock(fd);
 836        if (ret < 0) {
 837            error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d",
 838                             name, fd);
 839            close(fd);
 840            return -1;
 841        }
 842
 843        vnet_hdr = tap_probe_vnet_hdr(fd, errp);
 844        if (vnet_hdr < 0) {
 845            close(fd);
 846            return -1;
 847        }
 848
 849        net_init_tap_one(tap, peer, "tap", name, NULL,
 850                         script, downscript,
 851                         vhostfdname, vnet_hdr, fd, &err);
 852        if (err) {
 853            error_propagate(errp, err);
 854            close(fd);
 855            return -1;
 856        }
 857    } else if (tap->has_fds) {
 858        char **fds;
 859        char **vhost_fds;
 860        int nfds = 0, nvhosts = 0;
 861
 862        if (tap->has_ifname || tap->has_script || tap->has_downscript ||
 863            tap->has_vnet_hdr || tap->has_helper || tap->has_queues ||
 864            tap->has_vhostfd) {
 865            error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, "
 866                       "helper=, queues=, and vhostfd= "
 867                       "are invalid with fds=");
 868            return -1;
 869        }
 870
 871        fds = g_new0(char *, MAX_TAP_QUEUES);
 872        vhost_fds = g_new0(char *, MAX_TAP_QUEUES);
 873
 874        nfds = get_fds(tap->fds, fds, MAX_TAP_QUEUES);
 875        if (tap->has_vhostfds) {
 876            nvhosts = get_fds(tap->vhostfds, vhost_fds, MAX_TAP_QUEUES);
 877            if (nfds != nvhosts) {
 878                error_setg(errp, "The number of fds passed does not match "
 879                           "the number of vhostfds passed");
 880                ret = -1;
 881                goto free_fail;
 882            }
 883        }
 884
 885        for (i = 0; i < nfds; i++) {
 886            fd = monitor_fd_param(monitor_cur(), fds[i], errp);
 887            if (fd == -1) {
 888                ret = -1;
 889                goto free_fail;
 890            }
 891
 892            ret = qemu_try_set_nonblock(fd);
 893            if (ret < 0) {
 894                error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d",
 895                                 name, fd);
 896                goto free_fail;
 897            }
 898
 899            if (i == 0) {
 900                vnet_hdr = tap_probe_vnet_hdr(fd, errp);
 901                if (vnet_hdr < 0) {
 902                    goto free_fail;
 903                }
 904            } else if (vnet_hdr != tap_probe_vnet_hdr(fd, NULL)) {
 905                error_setg(errp,
 906                           "vnet_hdr not consistent across given tap fds");
 907                ret = -1;
 908                goto free_fail;
 909            }
 910
 911            net_init_tap_one(tap, peer, "tap", name, ifname,
 912                             script, downscript,
 913                             tap->has_vhostfds ? vhost_fds[i] : NULL,
 914                             vnet_hdr, fd, &err);
 915            if (err) {
 916                error_propagate(errp, err);
 917                ret = -1;
 918                goto free_fail;
 919            }
 920        }
 921
 922free_fail:
 923        for (i = 0; i < nvhosts; i++) {
 924            g_free(vhost_fds[i]);
 925        }
 926        for (i = 0; i < nfds; i++) {
 927            g_free(fds[i]);
 928        }
 929        g_free(fds);
 930        g_free(vhost_fds);
 931        return ret;
 932    } else if (tap->has_helper) {
 933        if (tap->has_ifname || tap->has_script || tap->has_downscript ||
 934            tap->has_vnet_hdr || tap->has_queues || tap->has_vhostfds) {
 935            error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, "
 936                       "queues=, and vhostfds= are invalid with helper=");
 937            return -1;
 938        }
 939
 940        fd = net_bridge_run_helper(tap->helper,
 941                                   tap->has_br ?
 942                                   tap->br : DEFAULT_BRIDGE_INTERFACE,
 943                                   errp);
 944        if (fd == -1) {
 945            return -1;
 946        }
 947
 948        qemu_set_nonblock(fd);
 949        vnet_hdr = tap_probe_vnet_hdr(fd, errp);
 950        if (vnet_hdr < 0) {
 951            close(fd);
 952            return -1;
 953        }
 954
 955        net_init_tap_one(tap, peer, "bridge", name, ifname,
 956                         script, downscript, vhostfdname,
 957                         vnet_hdr, fd, &err);
 958        if (err) {
 959            error_propagate(errp, err);
 960            close(fd);
 961            return -1;
 962        }
 963    } else {
 964        g_autofree char *default_script = NULL;
 965        g_autofree char *default_downscript = NULL;
 966        if (tap->has_vhostfds) {
 967            error_setg(errp, "vhostfds= is invalid if fds= wasn't specified");
 968            return -1;
 969        }
 970
 971        if (!script) {
 972            script = default_script = get_relocated_path(DEFAULT_NETWORK_SCRIPT);
 973        }
 974        if (!downscript) {
 975            downscript = default_downscript =
 976                                 get_relocated_path(DEFAULT_NETWORK_DOWN_SCRIPT);
 977        }
 978
 979        if (tap->has_ifname) {
 980            pstrcpy(ifname, sizeof ifname, tap->ifname);
 981        } else {
 982            ifname[0] = '\0';
 983        }
 984
 985        for (i = 0; i < queues; i++) {
 986            fd = net_tap_init(tap, &vnet_hdr, i >= 1 ? "no" : script,
 987                              ifname, sizeof ifname, queues > 1, errp);
 988            if (fd == -1) {
 989                return -1;
 990            }
 991
 992            if (queues > 1 && i == 0 && !tap->has_ifname) {
 993                if (tap_fd_get_ifname(fd, ifname)) {
 994                    error_setg(errp, "Fail to get ifname");
 995                    close(fd);
 996                    return -1;
 997                }
 998            }
 999
1000            net_init_tap_one(tap, peer, "tap", name, ifname,
1001                             i >= 1 ? "no" : script,
1002                             i >= 1 ? "no" : downscript,
1003                             vhostfdname, vnet_hdr, fd, &err);
1004            if (err) {
1005                error_propagate(errp, err);
1006                close(fd);
1007                return -1;
1008            }
1009        }
1010    }
1011
1012    return 0;
1013}
1014
1015VHostNetState *tap_get_vhost_net(NetClientState *nc)
1016{
1017    TAPState *s = DO_UPCAST(TAPState, nc, nc);
1018    assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
1019    return s->vhost_net;
1020}
1021
1022int tap_enable(NetClientState *nc)
1023{
1024    TAPState *s = DO_UPCAST(TAPState, nc, nc);
1025    int ret;
1026
1027    if (s->enabled) {
1028        return 0;
1029    } else {
1030        ret = tap_fd_enable(s->fd);
1031        if (ret == 0) {
1032            s->enabled = true;
1033            tap_update_fd_handler(s);
1034        }
1035        return ret;
1036    }
1037}
1038
1039int tap_disable(NetClientState *nc)
1040{
1041    TAPState *s = DO_UPCAST(TAPState, nc, nc);
1042    int ret;
1043
1044    if (s->enabled == 0) {
1045        return 0;
1046    } else {
1047        ret = tap_fd_disable(s->fd);
1048        if (ret == 0) {
1049            qemu_purge_queued_packets(nc);
1050            s->enabled = false;
1051            tap_update_fd_handler(s);
1052        }
1053        return ret;
1054    }
1055}
1056