qemu/net/tap.c
<<
>>
Prefs
   1/*
   2 * QEMU System Emulator
   3 *
   4 * Copyright (c) 2003-2008 Fabrice Bellard
   5 * Copyright (c) 2009 Red Hat, Inc.
   6 *
   7 * Permission is hereby granted, free of charge, to any person obtaining a copy
   8 * of this software and associated documentation files (the "Software"), to deal
   9 * in the Software without restriction, including without limitation the rights
  10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 * copies of the Software, and to permit persons to whom the Software is
  12 * furnished to do so, subject to the following conditions:
  13 *
  14 * The above copyright notice and this permission notice shall be included in
  15 * all copies or substantial portions of the Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23 * THE SOFTWARE.
  24 */
  25
  26#include "qemu/osdep.h"
  27#include "tap_int.h"
  28
  29
  30#include <sys/ioctl.h>
  31#include <sys/wait.h>
  32#include <sys/socket.h>
  33#include <net/if.h>
  34
  35#include "net/eth.h"
  36#include "net/net.h"
  37#include "clients.h"
  38#include "monitor/monitor.h"
  39#include "sysemu/sysemu.h"
  40#include "qapi/error.h"
  41#include "qemu/cutils.h"
  42#include "qemu/error-report.h"
  43#include "qemu/main-loop.h"
  44#include "qemu/sockets.h"
  45
  46#include "net/tap.h"
  47
  48#include "net/vhost_net.h"
  49
  50typedef struct TAPState {
  51    NetClientState nc;
  52    int fd;
  53    char down_script[1024];
  54    char down_script_arg[128];
  55    uint8_t buf[NET_BUFSIZE];
  56    bool read_poll;
  57    bool write_poll;
  58    bool using_vnet_hdr;
  59    bool has_ufo;
  60    bool enabled;
  61    VHostNetState *vhost_net;
  62    unsigned host_vnet_hdr_len;
  63    Notifier exit;
  64} TAPState;
  65
  66static void launch_script(const char *setup_script, const char *ifname,
  67                          int fd, Error **errp);
  68
  69static void tap_send(void *opaque);
  70static void tap_writable(void *opaque);
  71
  72static void tap_update_fd_handler(TAPState *s)
  73{
  74    qemu_set_fd_handler(s->fd,
  75                        s->read_poll && s->enabled ? tap_send : NULL,
  76                        s->write_poll && s->enabled ? tap_writable : NULL,
  77                        s);
  78}
  79
  80static void tap_read_poll(TAPState *s, bool enable)
  81{
  82    s->read_poll = enable;
  83    tap_update_fd_handler(s);
  84}
  85
  86static void tap_write_poll(TAPState *s, bool enable)
  87{
  88    s->write_poll = enable;
  89    tap_update_fd_handler(s);
  90}
  91
  92static void tap_writable(void *opaque)
  93{
  94    TAPState *s = opaque;
  95
  96    tap_write_poll(s, false);
  97
  98    qemu_flush_queued_packets(&s->nc);
  99}
 100
 101static ssize_t tap_write_packet(TAPState *s, const struct iovec *iov, int iovcnt)
 102{
 103    ssize_t len;
 104
 105    len = RETRY_ON_EINTR(writev(s->fd, iov, iovcnt));
 106
 107    if (len == -1 && errno == EAGAIN) {
 108        tap_write_poll(s, true);
 109        return 0;
 110    }
 111
 112    return len;
 113}
 114
 115static ssize_t tap_receive_iov(NetClientState *nc, const struct iovec *iov,
 116                               int iovcnt)
 117{
 118    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 119    const struct iovec *iovp = iov;
 120    struct iovec iov_copy[iovcnt + 1];
 121    struct virtio_net_hdr_mrg_rxbuf hdr = { };
 122
 123    if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
 124        iov_copy[0].iov_base = &hdr;
 125        iov_copy[0].iov_len =  s->host_vnet_hdr_len;
 126        memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov));
 127        iovp = iov_copy;
 128        iovcnt++;
 129    }
 130
 131    return tap_write_packet(s, iovp, iovcnt);
 132}
 133
 134static ssize_t tap_receive_raw(NetClientState *nc, const uint8_t *buf, size_t size)
 135{
 136    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 137    struct iovec iov[2];
 138    int iovcnt = 0;
 139    struct virtio_net_hdr_mrg_rxbuf hdr = { };
 140
 141    if (s->host_vnet_hdr_len) {
 142        iov[iovcnt].iov_base = &hdr;
 143        iov[iovcnt].iov_len  = s->host_vnet_hdr_len;
 144        iovcnt++;
 145    }
 146
 147    iov[iovcnt].iov_base = (char *)buf;
 148    iov[iovcnt].iov_len  = size;
 149    iovcnt++;
 150
 151    return tap_write_packet(s, iov, iovcnt);
 152}
 153
 154static ssize_t tap_receive(NetClientState *nc, const uint8_t *buf, size_t size)
 155{
 156    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 157    struct iovec iov[1];
 158
 159    if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
 160        return tap_receive_raw(nc, buf, size);
 161    }
 162
 163    iov[0].iov_base = (char *)buf;
 164    iov[0].iov_len  = size;
 165
 166    return tap_write_packet(s, iov, 1);
 167}
 168
 169#ifndef __sun__
 170ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen)
 171{
 172    return read(tapfd, buf, maxlen);
 173}
 174#endif
 175
 176static void tap_send_completed(NetClientState *nc, ssize_t len)
 177{
 178    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 179    tap_read_poll(s, true);
 180}
 181
 182static void tap_send(void *opaque)
 183{
 184    TAPState *s = opaque;
 185    int size;
 186    int packets = 0;
 187
 188    while (true) {
 189        uint8_t *buf = s->buf;
 190        uint8_t min_pkt[ETH_ZLEN];
 191        size_t min_pktsz = sizeof(min_pkt);
 192
 193        size = tap_read_packet(s->fd, s->buf, sizeof(s->buf));
 194        if (size <= 0) {
 195            break;
 196        }
 197
 198        if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
 199            buf  += s->host_vnet_hdr_len;
 200            size -= s->host_vnet_hdr_len;
 201        }
 202
 203        if (net_peer_needs_padding(&s->nc)) {
 204            if (eth_pad_short_frame(min_pkt, &min_pktsz, buf, size)) {
 205                buf = min_pkt;
 206                size = min_pktsz;
 207            }
 208        }
 209
 210        size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed);
 211        if (size == 0) {
 212            tap_read_poll(s, false);
 213            break;
 214        } else if (size < 0) {
 215            break;
 216        }
 217
 218        /*
 219         * When the host keeps receiving more packets while tap_send() is
 220         * running we can hog the QEMU global mutex.  Limit the number of
 221         * packets that are processed per tap_send() callback to prevent
 222         * stalling the guest.
 223         */
 224        packets++;
 225        if (packets >= 50) {
 226            break;
 227        }
 228    }
 229}
 230
 231static bool tap_has_ufo(NetClientState *nc)
 232{
 233    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 234
 235    assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
 236
 237    return s->has_ufo;
 238}
 239
 240static bool tap_has_vnet_hdr(NetClientState *nc)
 241{
 242    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 243
 244    assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
 245
 246    return !!s->host_vnet_hdr_len;
 247}
 248
 249static bool tap_has_vnet_hdr_len(NetClientState *nc, int len)
 250{
 251    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 252
 253    assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
 254
 255    return !!tap_probe_vnet_hdr_len(s->fd, len);
 256}
 257
 258static int tap_get_vnet_hdr_len(NetClientState *nc)
 259{
 260    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 261
 262    return s->host_vnet_hdr_len;
 263}
 264
 265static void tap_set_vnet_hdr_len(NetClientState *nc, int len)
 266{
 267    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 268
 269    assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
 270    assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) ||
 271           len == sizeof(struct virtio_net_hdr) ||
 272           len == sizeof(struct virtio_net_hdr_v1_hash));
 273
 274    tap_fd_set_vnet_hdr_len(s->fd, len);
 275    s->host_vnet_hdr_len = len;
 276}
 277
 278static bool tap_get_using_vnet_hdr(NetClientState *nc)
 279{
 280    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 281
 282    return s->using_vnet_hdr;
 283}
 284
 285static void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr)
 286{
 287    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 288
 289    assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
 290    assert(!!s->host_vnet_hdr_len == using_vnet_hdr);
 291
 292    s->using_vnet_hdr = using_vnet_hdr;
 293}
 294
 295static int tap_set_vnet_le(NetClientState *nc, bool is_le)
 296{
 297    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 298
 299    return tap_fd_set_vnet_le(s->fd, is_le);
 300}
 301
 302static int tap_set_vnet_be(NetClientState *nc, bool is_be)
 303{
 304    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 305
 306    return tap_fd_set_vnet_be(s->fd, is_be);
 307}
 308
 309static void tap_set_offload(NetClientState *nc, int csum, int tso4,
 310                     int tso6, int ecn, int ufo)
 311{
 312    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 313    if (s->fd < 0) {
 314        return;
 315    }
 316
 317    tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo);
 318}
 319
 320static void tap_exit_notify(Notifier *notifier, void *data)
 321{
 322    TAPState *s = container_of(notifier, TAPState, exit);
 323    Error *err = NULL;
 324
 325    if (s->down_script[0]) {
 326        launch_script(s->down_script, s->down_script_arg, s->fd, &err);
 327        if (err) {
 328            error_report_err(err);
 329        }
 330    }
 331}
 332
 333static void tap_cleanup(NetClientState *nc)
 334{
 335    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 336
 337    if (s->vhost_net) {
 338        vhost_net_cleanup(s->vhost_net);
 339        g_free(s->vhost_net);
 340        s->vhost_net = NULL;
 341    }
 342
 343    qemu_purge_queued_packets(nc);
 344
 345    tap_exit_notify(&s->exit, NULL);
 346    qemu_remove_exit_notifier(&s->exit);
 347
 348    tap_read_poll(s, false);
 349    tap_write_poll(s, false);
 350    close(s->fd);
 351    s->fd = -1;
 352}
 353
 354static void tap_poll(NetClientState *nc, bool enable)
 355{
 356    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 357    tap_read_poll(s, enable);
 358    tap_write_poll(s, enable);
 359}
 360
 361static bool tap_set_steering_ebpf(NetClientState *nc, int prog_fd)
 362{
 363    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 364    assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
 365
 366    return tap_fd_set_steering_ebpf(s->fd, prog_fd) == 0;
 367}
 368
 369int tap_get_fd(NetClientState *nc)
 370{
 371    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 372    assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
 373    return s->fd;
 374}
 375
 376/* fd support */
 377
 378static NetClientInfo net_tap_info = {
 379    .type = NET_CLIENT_DRIVER_TAP,
 380    .size = sizeof(TAPState),
 381    .receive = tap_receive,
 382    .receive_raw = tap_receive_raw,
 383    .receive_iov = tap_receive_iov,
 384    .poll = tap_poll,
 385    .cleanup = tap_cleanup,
 386    .has_ufo = tap_has_ufo,
 387    .has_vnet_hdr = tap_has_vnet_hdr,
 388    .has_vnet_hdr_len = tap_has_vnet_hdr_len,
 389    .get_using_vnet_hdr = tap_get_using_vnet_hdr,
 390    .using_vnet_hdr = tap_using_vnet_hdr,
 391    .set_offload = tap_set_offload,
 392    .get_vnet_hdr_len = tap_get_vnet_hdr_len,
 393    .set_vnet_hdr_len = tap_set_vnet_hdr_len,
 394    .set_vnet_le = tap_set_vnet_le,
 395    .set_vnet_be = tap_set_vnet_be,
 396    .set_steering_ebpf = tap_set_steering_ebpf,
 397};
 398
 399static TAPState *net_tap_fd_init(NetClientState *peer,
 400                                 const char *model,
 401                                 const char *name,
 402                                 int fd,
 403                                 int vnet_hdr)
 404{
 405    NetClientState *nc;
 406    TAPState *s;
 407
 408    nc = qemu_new_net_client(&net_tap_info, peer, model, name);
 409
 410    s = DO_UPCAST(TAPState, nc, nc);
 411
 412    s->fd = fd;
 413    s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
 414    s->using_vnet_hdr = false;
 415    s->has_ufo = tap_probe_has_ufo(s->fd);
 416    s->enabled = true;
 417    tap_set_offload(&s->nc, 0, 0, 0, 0, 0);
 418    /*
 419     * Make sure host header length is set correctly in tap:
 420     * it might have been modified by another instance of qemu.
 421     */
 422    if (tap_probe_vnet_hdr_len(s->fd, s->host_vnet_hdr_len)) {
 423        tap_fd_set_vnet_hdr_len(s->fd, s->host_vnet_hdr_len);
 424    }
 425    tap_read_poll(s, true);
 426    s->vhost_net = NULL;
 427
 428    s->exit.notify = tap_exit_notify;
 429    qemu_add_exit_notifier(&s->exit);
 430
 431    return s;
 432}
 433
 434static void launch_script(const char *setup_script, const char *ifname,
 435                          int fd, Error **errp)
 436{
 437    int pid, status;
 438    char *args[3];
 439    char **parg;
 440
 441    /* try to launch network script */
 442    pid = fork();
 443    if (pid < 0) {
 444        error_setg_errno(errp, errno, "could not launch network script %s",
 445                         setup_script);
 446        return;
 447    }
 448    if (pid == 0) {
 449        int open_max = sysconf(_SC_OPEN_MAX), i;
 450
 451        for (i = 3; i < open_max; i++) {
 452            if (i != fd) {
 453                close(i);
 454            }
 455        }
 456        parg = args;
 457        *parg++ = (char *)setup_script;
 458        *parg++ = (char *)ifname;
 459        *parg = NULL;
 460        execv(setup_script, args);
 461        _exit(1);
 462    } else {
 463        while (waitpid(pid, &status, 0) != pid) {
 464            /* loop */
 465        }
 466
 467        if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
 468            return;
 469        }
 470        error_setg(errp, "network script %s failed with status %d",
 471                   setup_script, status);
 472    }
 473}
 474
 475static int recv_fd(int c)
 476{
 477    int fd;
 478    uint8_t msgbuf[CMSG_SPACE(sizeof(fd))];
 479    struct msghdr msg = {
 480        .msg_control = msgbuf,
 481        .msg_controllen = sizeof(msgbuf),
 482    };
 483    struct cmsghdr *cmsg;
 484    struct iovec iov;
 485    uint8_t req[1];
 486    ssize_t len;
 487
 488    cmsg = CMSG_FIRSTHDR(&msg);
 489    cmsg->cmsg_level = SOL_SOCKET;
 490    cmsg->cmsg_type = SCM_RIGHTS;
 491    cmsg->cmsg_len = CMSG_LEN(sizeof(fd));
 492    msg.msg_controllen = cmsg->cmsg_len;
 493
 494    iov.iov_base = req;
 495    iov.iov_len = sizeof(req);
 496
 497    msg.msg_iov = &iov;
 498    msg.msg_iovlen = 1;
 499
 500    len = recvmsg(c, &msg, 0);
 501    if (len > 0) {
 502        memcpy(&fd, CMSG_DATA(cmsg), sizeof(fd));
 503        return fd;
 504    }
 505
 506    return len;
 507}
 508
 509static int net_bridge_run_helper(const char *helper, const char *bridge,
 510                                 Error **errp)
 511{
 512    sigset_t oldmask, mask;
 513    g_autofree char *default_helper = NULL;
 514    int pid, status;
 515    char *args[5];
 516    char **parg;
 517    int sv[2];
 518
 519    sigemptyset(&mask);
 520    sigaddset(&mask, SIGCHLD);
 521    sigprocmask(SIG_BLOCK, &mask, &oldmask);
 522
 523    if (!helper) {
 524        helper = default_helper = get_relocated_path(DEFAULT_BRIDGE_HELPER);
 525    }
 526
 527    if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
 528        error_setg_errno(errp, errno, "socketpair() failed");
 529        return -1;
 530    }
 531
 532    /* try to launch bridge helper */
 533    pid = fork();
 534    if (pid < 0) {
 535        error_setg_errno(errp, errno, "Can't fork bridge helper");
 536        return -1;
 537    }
 538    if (pid == 0) {
 539        int open_max = sysconf(_SC_OPEN_MAX), i;
 540        char *fd_buf = NULL;
 541        char *br_buf = NULL;
 542        char *helper_cmd = NULL;
 543
 544        for (i = 3; i < open_max; i++) {
 545            if (i != sv[1]) {
 546                close(i);
 547            }
 548        }
 549
 550        fd_buf = g_strdup_printf("%s%d", "--fd=", sv[1]);
 551
 552        if (strrchr(helper, ' ') || strrchr(helper, '\t')) {
 553            /* assume helper is a command */
 554
 555            if (strstr(helper, "--br=") == NULL) {
 556                br_buf = g_strdup_printf("%s%s", "--br=", bridge);
 557            }
 558
 559            helper_cmd = g_strdup_printf("%s %s %s %s", helper,
 560                            "--use-vnet", fd_buf, br_buf ? br_buf : "");
 561
 562            parg = args;
 563            *parg++ = (char *)"sh";
 564            *parg++ = (char *)"-c";
 565            *parg++ = helper_cmd;
 566            *parg++ = NULL;
 567
 568            execv("/bin/sh", args);
 569            g_free(helper_cmd);
 570        } else {
 571            /* assume helper is just the executable path name */
 572
 573            br_buf = g_strdup_printf("%s%s", "--br=", bridge);
 574
 575            parg = args;
 576            *parg++ = (char *)helper;
 577            *parg++ = (char *)"--use-vnet";
 578            *parg++ = fd_buf;
 579            *parg++ = br_buf;
 580            *parg++ = NULL;
 581
 582            execv(helper, args);
 583        }
 584        g_free(fd_buf);
 585        g_free(br_buf);
 586        _exit(1);
 587
 588    } else {
 589        int fd;
 590        int saved_errno;
 591
 592        close(sv[1]);
 593
 594        fd = RETRY_ON_EINTR(recv_fd(sv[0]));
 595        saved_errno = errno;
 596
 597        close(sv[0]);
 598
 599        while (waitpid(pid, &status, 0) != pid) {
 600            /* loop */
 601        }
 602        sigprocmask(SIG_SETMASK, &oldmask, NULL);
 603        if (fd < 0) {
 604            error_setg_errno(errp, saved_errno,
 605                             "failed to recv file descriptor");
 606            return -1;
 607        }
 608        if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
 609            error_setg(errp, "bridge helper failed");
 610            return -1;
 611        }
 612        return fd;
 613    }
 614}
 615
 616int net_init_bridge(const Netdev *netdev, const char *name,
 617                    NetClientState *peer, Error **errp)
 618{
 619    const NetdevBridgeOptions *bridge;
 620    const char *helper, *br;
 621    TAPState *s;
 622    int fd, vnet_hdr;
 623
 624    assert(netdev->type == NET_CLIENT_DRIVER_BRIDGE);
 625    bridge = &netdev->u.bridge;
 626    helper = bridge->helper;
 627    br     = bridge->br ?: DEFAULT_BRIDGE_INTERFACE;
 628
 629    fd = net_bridge_run_helper(helper, br, errp);
 630    if (fd == -1) {
 631        return -1;
 632    }
 633
 634    if (!g_unix_set_fd_nonblocking(fd, true, NULL)) {
 635        error_setg_errno(errp, errno, "Failed to set FD nonblocking");
 636        return -1;
 637    }
 638    vnet_hdr = tap_probe_vnet_hdr(fd, errp);
 639    if (vnet_hdr < 0) {
 640        close(fd);
 641        return -1;
 642    }
 643    s = net_tap_fd_init(peer, "bridge", name, fd, vnet_hdr);
 644
 645    qemu_set_info_str(&s->nc, "helper=%s,br=%s", helper, br);
 646
 647    return 0;
 648}
 649
 650static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr,
 651                        const char *setup_script, char *ifname,
 652                        size_t ifname_sz, int mq_required, Error **errp)
 653{
 654    Error *err = NULL;
 655    int fd, vnet_hdr_required;
 656
 657    if (tap->has_vnet_hdr) {
 658        *vnet_hdr = tap->vnet_hdr;
 659        vnet_hdr_required = *vnet_hdr;
 660    } else {
 661        *vnet_hdr = 1;
 662        vnet_hdr_required = 0;
 663    }
 664
 665    fd = RETRY_ON_EINTR(tap_open(ifname, ifname_sz, vnet_hdr, vnet_hdr_required,
 666                      mq_required, errp));
 667    if (fd < 0) {
 668        return -1;
 669    }
 670
 671    if (setup_script &&
 672        setup_script[0] != '\0' &&
 673        strcmp(setup_script, "no") != 0) {
 674        launch_script(setup_script, ifname, fd, &err);
 675        if (err) {
 676            error_propagate(errp, err);
 677            close(fd);
 678            return -1;
 679        }
 680    }
 681
 682    return fd;
 683}
 684
 685#define MAX_TAP_QUEUES 1024
 686
 687static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
 688                             const char *model, const char *name,
 689                             const char *ifname, const char *script,
 690                             const char *downscript, const char *vhostfdname,
 691                             int vnet_hdr, int fd, Error **errp)
 692{
 693    Error *err = NULL;
 694    TAPState *s = net_tap_fd_init(peer, model, name, fd, vnet_hdr);
 695    int vhostfd;
 696
 697    tap_set_sndbuf(s->fd, tap, &err);
 698    if (err) {
 699        error_propagate(errp, err);
 700        goto failed;
 701    }
 702
 703    if (tap->fd || tap->fds) {
 704        qemu_set_info_str(&s->nc, "fd=%d", fd);
 705    } else if (tap->helper) {
 706        qemu_set_info_str(&s->nc, "helper=%s", tap->helper);
 707    } else {
 708        qemu_set_info_str(&s->nc, "ifname=%s,script=%s,downscript=%s", ifname,
 709                          script, downscript);
 710
 711        if (strcmp(downscript, "no") != 0) {
 712            snprintf(s->down_script, sizeof(s->down_script), "%s", downscript);
 713            snprintf(s->down_script_arg, sizeof(s->down_script_arg),
 714                     "%s", ifname);
 715        }
 716    }
 717
 718    if (tap->has_vhost ? tap->vhost :
 719        vhostfdname || (tap->has_vhostforce && tap->vhostforce)) {
 720        VhostNetOptions options;
 721
 722        options.backend_type = VHOST_BACKEND_TYPE_KERNEL;
 723        options.net_backend = &s->nc;
 724        if (tap->has_poll_us) {
 725            options.busyloop_timeout = tap->poll_us;
 726        } else {
 727            options.busyloop_timeout = 0;
 728        }
 729
 730        if (vhostfdname) {
 731            vhostfd = monitor_fd_param(monitor_cur(), vhostfdname, &err);
 732            if (vhostfd == -1) {
 733                if (tap->has_vhostforce && tap->vhostforce) {
 734                    error_propagate(errp, err);
 735                } else {
 736                    warn_report_err(err);
 737                }
 738                goto failed;
 739            }
 740            if (!g_unix_set_fd_nonblocking(vhostfd, true, NULL)) {
 741                error_setg_errno(errp, errno, "%s: Can't use file descriptor %d",
 742                                 name, fd);
 743                goto failed;
 744            }
 745        } else {
 746            vhostfd = open("/dev/vhost-net", O_RDWR);
 747            if (vhostfd < 0) {
 748                if (tap->has_vhostforce && tap->vhostforce) {
 749                    error_setg_errno(errp, errno,
 750                                     "tap: open vhost char device failed");
 751                } else {
 752                    warn_report("tap: open vhost char device failed: %s",
 753                                strerror(errno));
 754                }
 755                goto failed;
 756            }
 757            if (!g_unix_set_fd_nonblocking(vhostfd, true, NULL)) {
 758                error_setg_errno(errp, errno, "Failed to set FD nonblocking");
 759                goto failed;
 760            }
 761        }
 762        options.opaque = (void *)(uintptr_t)vhostfd;
 763        options.nvqs = 2;
 764
 765        s->vhost_net = vhost_net_init(&options);
 766        if (!s->vhost_net) {
 767            if (tap->has_vhostforce && tap->vhostforce) {
 768                error_setg(errp, VHOST_NET_INIT_FAILED);
 769            } else {
 770                warn_report(VHOST_NET_INIT_FAILED);
 771            }
 772            goto failed;
 773        }
 774    } else if (vhostfdname) {
 775        error_setg(errp, "vhostfd(s)= is not valid without vhost");
 776        goto failed;
 777    }
 778
 779    return;
 780
 781failed:
 782    qemu_del_net_client(&s->nc);
 783}
 784
 785static int get_fds(char *str, char *fds[], int max)
 786{
 787    char *ptr = str, *this;
 788    size_t len = strlen(str);
 789    int i = 0;
 790
 791    while (i < max && ptr < str + len) {
 792        this = strchr(ptr, ':');
 793
 794        if (this == NULL) {
 795            fds[i] = g_strdup(ptr);
 796        } else {
 797            fds[i] = g_strndup(ptr, this - ptr);
 798        }
 799
 800        i++;
 801        if (this == NULL) {
 802            break;
 803        } else {
 804            ptr = this + 1;
 805        }
 806    }
 807
 808    return i;
 809}
 810
 811int net_init_tap(const Netdev *netdev, const char *name,
 812                 NetClientState *peer, Error **errp)
 813{
 814    const NetdevTapOptions *tap;
 815    int fd, vnet_hdr = 0, i = 0, queues;
 816    /* for the no-fd, no-helper case */
 817    const char *script;
 818    const char *downscript;
 819    Error *err = NULL;
 820    const char *vhostfdname;
 821    char ifname[128];
 822    int ret = 0;
 823
 824    assert(netdev->type == NET_CLIENT_DRIVER_TAP);
 825    tap = &netdev->u.tap;
 826    queues = tap->has_queues ? tap->queues : 1;
 827    vhostfdname = tap->vhostfd;
 828    script = tap->script;
 829    downscript = tap->downscript;
 830
 831    /* QEMU hubs do not support multiqueue tap, in this case peer is set.
 832     * For -netdev, peer is always NULL. */
 833    if (peer && (tap->has_queues || tap->fds || tap->vhostfds)) {
 834        error_setg(errp, "Multiqueue tap cannot be used with hubs");
 835        return -1;
 836    }
 837
 838    if (tap->fd) {
 839        if (tap->ifname || tap->script || tap->downscript ||
 840            tap->has_vnet_hdr || tap->helper || tap->has_queues ||
 841            tap->fds || tap->vhostfds) {
 842            error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, "
 843                       "helper=, queues=, fds=, and vhostfds= "
 844                       "are invalid with fd=");
 845            return -1;
 846        }
 847
 848        fd = monitor_fd_param(monitor_cur(), tap->fd, errp);
 849        if (fd == -1) {
 850            return -1;
 851        }
 852
 853        if (!g_unix_set_fd_nonblocking(fd, true, NULL)) {
 854            error_setg_errno(errp, errno, "%s: Can't use file descriptor %d",
 855                             name, fd);
 856            close(fd);
 857            return -1;
 858        }
 859
 860        vnet_hdr = tap_probe_vnet_hdr(fd, errp);
 861        if (vnet_hdr < 0) {
 862            close(fd);
 863            return -1;
 864        }
 865
 866        net_init_tap_one(tap, peer, "tap", name, NULL,
 867                         script, downscript,
 868                         vhostfdname, vnet_hdr, fd, &err);
 869        if (err) {
 870            error_propagate(errp, err);
 871            close(fd);
 872            return -1;
 873        }
 874    } else if (tap->fds) {
 875        char **fds;
 876        char **vhost_fds;
 877        int nfds = 0, nvhosts = 0;
 878
 879        if (tap->ifname || tap->script || tap->downscript ||
 880            tap->has_vnet_hdr || tap->helper || tap->has_queues ||
 881            tap->vhostfd) {
 882            error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, "
 883                       "helper=, queues=, and vhostfd= "
 884                       "are invalid with fds=");
 885            return -1;
 886        }
 887
 888        fds = g_new0(char *, MAX_TAP_QUEUES);
 889        vhost_fds = g_new0(char *, MAX_TAP_QUEUES);
 890
 891        nfds = get_fds(tap->fds, fds, MAX_TAP_QUEUES);
 892        if (tap->vhostfds) {
 893            nvhosts = get_fds(tap->vhostfds, vhost_fds, MAX_TAP_QUEUES);
 894            if (nfds != nvhosts) {
 895                error_setg(errp, "The number of fds passed does not match "
 896                           "the number of vhostfds passed");
 897                ret = -1;
 898                goto free_fail;
 899            }
 900        }
 901
 902        for (i = 0; i < nfds; i++) {
 903            fd = monitor_fd_param(monitor_cur(), fds[i], errp);
 904            if (fd == -1) {
 905                ret = -1;
 906                goto free_fail;
 907            }
 908
 909            ret = g_unix_set_fd_nonblocking(fd, true, NULL);
 910            if (!ret) {
 911                error_setg_errno(errp, errno, "%s: Can't use file descriptor %d",
 912                                 name, fd);
 913                goto free_fail;
 914            }
 915
 916            if (i == 0) {
 917                vnet_hdr = tap_probe_vnet_hdr(fd, errp);
 918                if (vnet_hdr < 0) {
 919                    ret = -1;
 920                    goto free_fail;
 921                }
 922            } else if (vnet_hdr != tap_probe_vnet_hdr(fd, NULL)) {
 923                error_setg(errp,
 924                           "vnet_hdr not consistent across given tap fds");
 925                ret = -1;
 926                goto free_fail;
 927            }
 928
 929            net_init_tap_one(tap, peer, "tap", name, ifname,
 930                             script, downscript,
 931                             tap->vhostfds ? vhost_fds[i] : NULL,
 932                             vnet_hdr, fd, &err);
 933            if (err) {
 934                error_propagate(errp, err);
 935                ret = -1;
 936                goto free_fail;
 937            }
 938        }
 939
 940free_fail:
 941        for (i = 0; i < nvhosts; i++) {
 942            g_free(vhost_fds[i]);
 943        }
 944        for (i = 0; i < nfds; i++) {
 945            g_free(fds[i]);
 946        }
 947        g_free(fds);
 948        g_free(vhost_fds);
 949        return ret;
 950    } else if (tap->helper) {
 951        if (tap->ifname || tap->script || tap->downscript ||
 952            tap->has_vnet_hdr || tap->has_queues || tap->vhostfds) {
 953            error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, "
 954                       "queues=, and vhostfds= are invalid with helper=");
 955            return -1;
 956        }
 957
 958        fd = net_bridge_run_helper(tap->helper,
 959                                   tap->br ?: DEFAULT_BRIDGE_INTERFACE,
 960                                   errp);
 961        if (fd == -1) {
 962            return -1;
 963        }
 964
 965        if (!g_unix_set_fd_nonblocking(fd, true, NULL)) {
 966            error_setg_errno(errp, errno, "Failed to set FD nonblocking");
 967            return -1;
 968        }
 969        vnet_hdr = tap_probe_vnet_hdr(fd, errp);
 970        if (vnet_hdr < 0) {
 971            close(fd);
 972            return -1;
 973        }
 974
 975        net_init_tap_one(tap, peer, "bridge", name, ifname,
 976                         script, downscript, vhostfdname,
 977                         vnet_hdr, fd, &err);
 978        if (err) {
 979            error_propagate(errp, err);
 980            close(fd);
 981            return -1;
 982        }
 983    } else {
 984        g_autofree char *default_script = NULL;
 985        g_autofree char *default_downscript = NULL;
 986        if (tap->vhostfds) {
 987            error_setg(errp, "vhostfds= is invalid if fds= wasn't specified");
 988            return -1;
 989        }
 990
 991        if (!script) {
 992            script = default_script = get_relocated_path(DEFAULT_NETWORK_SCRIPT);
 993        }
 994        if (!downscript) {
 995            downscript = default_downscript =
 996                                 get_relocated_path(DEFAULT_NETWORK_DOWN_SCRIPT);
 997        }
 998
 999        if (tap->ifname) {
1000            pstrcpy(ifname, sizeof ifname, tap->ifname);
1001        } else {
1002            ifname[0] = '\0';
1003        }
1004
1005        for (i = 0; i < queues; i++) {
1006            fd = net_tap_init(tap, &vnet_hdr, i >= 1 ? "no" : script,
1007                              ifname, sizeof ifname, queues > 1, errp);
1008            if (fd == -1) {
1009                return -1;
1010            }
1011
1012            if (queues > 1 && i == 0 && !tap->ifname) {
1013                if (tap_fd_get_ifname(fd, ifname)) {
1014                    error_setg(errp, "Fail to get ifname");
1015                    close(fd);
1016                    return -1;
1017                }
1018            }
1019
1020            net_init_tap_one(tap, peer, "tap", name, ifname,
1021                             i >= 1 ? "no" : script,
1022                             i >= 1 ? "no" : downscript,
1023                             vhostfdname, vnet_hdr, fd, &err);
1024            if (err) {
1025                error_propagate(errp, err);
1026                close(fd);
1027                return -1;
1028            }
1029        }
1030    }
1031
1032    return 0;
1033}
1034
1035VHostNetState *tap_get_vhost_net(NetClientState *nc)
1036{
1037    TAPState *s = DO_UPCAST(TAPState, nc, nc);
1038    assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
1039    return s->vhost_net;
1040}
1041
1042int tap_enable(NetClientState *nc)
1043{
1044    TAPState *s = DO_UPCAST(TAPState, nc, nc);
1045    int ret;
1046
1047    if (s->enabled) {
1048        return 0;
1049    } else {
1050        ret = tap_fd_enable(s->fd);
1051        if (ret == 0) {
1052            s->enabled = true;
1053            tap_update_fd_handler(s);
1054        }
1055        return ret;
1056    }
1057}
1058
1059int tap_disable(NetClientState *nc)
1060{
1061    TAPState *s = DO_UPCAST(TAPState, nc, nc);
1062    int ret;
1063
1064    if (s->enabled == 0) {
1065        return 0;
1066    } else {
1067        ret = tap_fd_disable(s->fd);
1068        if (ret == 0) {
1069            qemu_purge_queued_packets(nc);
1070            s->enabled = false;
1071            tap_update_fd_handler(s);
1072        }
1073        return ret;
1074    }
1075}
1076