qemu/net/tap.c
<<
>>
Prefs
   1/*
   2 * QEMU System Emulator
   3 *
   4 * Copyright (c) 2003-2008 Fabrice Bellard
   5 * Copyright (c) 2009 Red Hat, Inc.
   6 *
   7 * Permission is hereby granted, free of charge, to any person obtaining a copy
   8 * of this software and associated documentation files (the "Software"), to deal
   9 * in the Software without restriction, including without limitation the rights
  10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 * copies of the Software, and to permit persons to whom the Software is
  12 * furnished to do so, subject to the following conditions:
  13 *
  14 * The above copyright notice and this permission notice shall be included in
  15 * all copies or substantial portions of the Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23 * THE SOFTWARE.
  24 */
  25
  26#include "net/tap.h"
  27
  28#include "config-host.h"
  29
  30#include <sys/ioctl.h>
  31#include <sys/stat.h>
  32#include <sys/wait.h>
  33#include <sys/socket.h>
  34#include <net/if.h>
  35
  36#include "net.h"
  37#include "sysemu.h"
  38#include "qemu-char.h"
  39#include "qemu-common.h"
  40#include "qemu-error.h"
  41
  42#include "net/tap-linux.h"
  43
  44#include "hw/vhost_net.h"
  45
  46/* Maximum GSO packet size (64k) plus plenty of room for
  47 * the ethernet and virtio_net headers
  48 */
  49#define TAP_BUFSIZE (4096 + 65536)
  50
  51typedef struct TAPState {
  52    VLANClientState nc;
  53    int fd;
  54    char down_script[1024];
  55    char down_script_arg[128];
  56    uint8_t buf[TAP_BUFSIZE];
  57    unsigned int read_poll : 1;
  58    unsigned int write_poll : 1;
  59    unsigned int using_vnet_hdr : 1;
  60    unsigned int has_ufo: 1;
  61    VHostNetState *vhost_net;
  62    unsigned host_vnet_hdr_len;
  63} TAPState;
  64
  65static int launch_script(const char *setup_script, const char *ifname, int fd);
  66
  67static int tap_can_send(void *opaque);
  68static void tap_send(void *opaque);
  69static void tap_writable(void *opaque);
  70
  71static void tap_update_fd_handler(TAPState *s)
  72{
  73    qemu_set_fd_handler2(s->fd,
  74                         s->read_poll  ? tap_can_send : NULL,
  75                         s->read_poll  ? tap_send     : NULL,
  76                         s->write_poll ? tap_writable : NULL,
  77                         s);
  78}
  79
  80static void tap_read_poll(TAPState *s, int enable)
  81{
  82    s->read_poll = !!enable;
  83    tap_update_fd_handler(s);
  84}
  85
  86static void tap_write_poll(TAPState *s, int enable)
  87{
  88    s->write_poll = !!enable;
  89    tap_update_fd_handler(s);
  90}
  91
  92static void tap_writable(void *opaque)
  93{
  94    TAPState *s = opaque;
  95
  96    tap_write_poll(s, 0);
  97
  98    qemu_flush_queued_packets(&s->nc);
  99}
 100
 101static ssize_t tap_write_packet(TAPState *s, const struct iovec *iov, int iovcnt)
 102{
 103    ssize_t len;
 104
 105    do {
 106        len = writev(s->fd, iov, iovcnt);
 107    } while (len == -1 && errno == EINTR);
 108
 109    if (len == -1 && errno == EAGAIN) {
 110        tap_write_poll(s, 1);
 111        return 0;
 112    }
 113
 114    return len;
 115}
 116
 117static ssize_t tap_receive_iov(VLANClientState *nc, const struct iovec *iov,
 118                               int iovcnt)
 119{
 120    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 121    const struct iovec *iovp = iov;
 122    struct iovec iov_copy[iovcnt + 1];
 123    struct virtio_net_hdr_mrg_rxbuf hdr = { };
 124
 125    if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
 126        iov_copy[0].iov_base = &hdr;
 127        iov_copy[0].iov_len =  s->host_vnet_hdr_len;
 128        memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov));
 129        iovp = iov_copy;
 130        iovcnt++;
 131    }
 132
 133    return tap_write_packet(s, iovp, iovcnt);
 134}
 135
 136static ssize_t tap_receive_raw(VLANClientState *nc, const uint8_t *buf, size_t size)
 137{
 138    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 139    struct iovec iov[2];
 140    int iovcnt = 0;
 141    struct virtio_net_hdr_mrg_rxbuf hdr = { };
 142
 143    if (s->host_vnet_hdr_len) {
 144        iov[iovcnt].iov_base = &hdr;
 145        iov[iovcnt].iov_len  = s->host_vnet_hdr_len;
 146        iovcnt++;
 147    }
 148
 149    iov[iovcnt].iov_base = (char *)buf;
 150    iov[iovcnt].iov_len  = size;
 151    iovcnt++;
 152
 153    return tap_write_packet(s, iov, iovcnt);
 154}
 155
 156static ssize_t tap_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
 157{
 158    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 159    struct iovec iov[1];
 160
 161    if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
 162        return tap_receive_raw(nc, buf, size);
 163    }
 164
 165    iov[0].iov_base = (char *)buf;
 166    iov[0].iov_len  = size;
 167
 168    return tap_write_packet(s, iov, 1);
 169}
 170
 171static int tap_can_send(void *opaque)
 172{
 173    TAPState *s = opaque;
 174
 175    return qemu_can_send_packet(&s->nc);
 176}
 177
 178#ifndef __sun__
 179ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen)
 180{
 181    return read(tapfd, buf, maxlen);
 182}
 183#endif
 184
 185static void tap_send_completed(VLANClientState *nc, ssize_t len)
 186{
 187    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 188    tap_read_poll(s, 1);
 189}
 190
 191static void tap_send(void *opaque)
 192{
 193    TAPState *s = opaque;
 194    int size;
 195
 196    do {
 197        uint8_t *buf = s->buf;
 198
 199        size = tap_read_packet(s->fd, s->buf, sizeof(s->buf));
 200        if (size <= 0) {
 201            break;
 202        }
 203
 204        if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
 205            buf  += s->host_vnet_hdr_len;
 206            size -= s->host_vnet_hdr_len;
 207        }
 208
 209        size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed);
 210        if (size == 0) {
 211            tap_read_poll(s, 0);
 212        }
 213    } while (size > 0 && qemu_can_send_packet(&s->nc));
 214}
 215
 216int tap_has_ufo(VLANClientState *nc)
 217{
 218    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 219
 220    assert(nc->info->type == NET_CLIENT_TYPE_TAP);
 221
 222    return s->has_ufo;
 223}
 224
 225int tap_has_vnet_hdr(VLANClientState *nc)
 226{
 227    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 228
 229    assert(nc->info->type == NET_CLIENT_TYPE_TAP);
 230
 231    return !!s->host_vnet_hdr_len;
 232}
 233
 234int tap_has_vnet_hdr_len(VLANClientState *nc, int len)
 235{
 236    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 237
 238    assert(nc->info->type == NET_CLIENT_TYPE_TAP);
 239
 240    return tap_probe_vnet_hdr_len(s->fd, len);
 241}
 242
 243void tap_set_vnet_hdr_len(VLANClientState *nc, int len)
 244{
 245    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 246
 247    assert(nc->info->type == NET_CLIENT_TYPE_TAP);
 248    assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) ||
 249           len == sizeof(struct virtio_net_hdr));
 250
 251    tap_fd_set_vnet_hdr_len(s->fd, len);
 252    s->host_vnet_hdr_len = len;
 253}
 254
 255void tap_using_vnet_hdr(VLANClientState *nc, int using_vnet_hdr)
 256{
 257    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 258
 259    using_vnet_hdr = using_vnet_hdr != 0;
 260
 261    assert(nc->info->type == NET_CLIENT_TYPE_TAP);
 262    assert(!!s->host_vnet_hdr_len == using_vnet_hdr);
 263
 264    s->using_vnet_hdr = using_vnet_hdr;
 265}
 266
 267void tap_set_offload(VLANClientState *nc, int csum, int tso4,
 268                     int tso6, int ecn, int ufo)
 269{
 270    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 271    if (s->fd < 0) {
 272        return;
 273    }
 274
 275    tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo);
 276}
 277
 278static void tap_cleanup(VLANClientState *nc)
 279{
 280    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 281
 282    if (s->vhost_net) {
 283        vhost_net_cleanup(s->vhost_net);
 284        s->vhost_net = NULL;
 285    }
 286
 287    qemu_purge_queued_packets(nc);
 288
 289    if (s->down_script[0])
 290        launch_script(s->down_script, s->down_script_arg, s->fd);
 291
 292    tap_read_poll(s, 0);
 293    tap_write_poll(s, 0);
 294    close(s->fd);
 295    s->fd = -1;
 296}
 297
 298static void tap_poll(VLANClientState *nc, bool enable)
 299{
 300    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 301    tap_read_poll(s, enable);
 302    tap_write_poll(s, enable);
 303}
 304
 305int tap_get_fd(VLANClientState *nc)
 306{
 307    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 308    assert(nc->info->type == NET_CLIENT_TYPE_TAP);
 309    return s->fd;
 310}
 311
 312/* fd support */
 313
 314static NetClientInfo net_tap_info = {
 315    .type = NET_CLIENT_TYPE_TAP,
 316    .size = sizeof(TAPState),
 317    .receive = tap_receive,
 318    .receive_raw = tap_receive_raw,
 319    .receive_iov = tap_receive_iov,
 320    .poll = tap_poll,
 321    .cleanup = tap_cleanup,
 322};
 323
 324static TAPState *net_tap_fd_init(VLANState *vlan,
 325                                 const char *model,
 326                                 const char *name,
 327                                 int fd,
 328                                 int vnet_hdr)
 329{
 330    VLANClientState *nc;
 331    TAPState *s;
 332
 333    nc = qemu_new_net_client(&net_tap_info, vlan, NULL, model, name);
 334
 335    s = DO_UPCAST(TAPState, nc, nc);
 336
 337    s->fd = fd;
 338    s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
 339    s->using_vnet_hdr = 0;
 340    s->has_ufo = tap_probe_has_ufo(s->fd);
 341    tap_set_offload(&s->nc, 0, 0, 0, 0, 0);
 342    tap_read_poll(s, 1);
 343    s->vhost_net = NULL;
 344    return s;
 345}
 346
 347static int launch_script(const char *setup_script, const char *ifname, int fd)
 348{
 349    sigset_t oldmask, mask;
 350    int pid, status;
 351    char *args[3];
 352    char **parg;
 353
 354    sigemptyset(&mask);
 355    sigaddset(&mask, SIGCHLD);
 356    sigprocmask(SIG_BLOCK, &mask, &oldmask);
 357
 358    /* try to launch network script */
 359    pid = fork();
 360    if (pid == 0) {
 361        int open_max = sysconf(_SC_OPEN_MAX), i;
 362
 363        for (i = 0; i < open_max; i++) {
 364            if (i != STDIN_FILENO &&
 365                i != STDOUT_FILENO &&
 366                i != STDERR_FILENO &&
 367                i != fd) {
 368                close(i);
 369            }
 370        }
 371        parg = args;
 372        *parg++ = (char *)setup_script;
 373        *parg++ = (char *)ifname;
 374        *parg = NULL;
 375        execv(setup_script, args);
 376        _exit(1);
 377    } else if (pid > 0) {
 378        while (waitpid(pid, &status, 0) != pid) {
 379            /* loop */
 380        }
 381        sigprocmask(SIG_SETMASK, &oldmask, NULL);
 382
 383        if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
 384            return 0;
 385        }
 386    }
 387    fprintf(stderr, "%s: could not launch network script\n", setup_script);
 388    return -1;
 389}
 390
 391static int net_tap_init(QemuOpts *opts, int *vnet_hdr)
 392{
 393    int fd, vnet_hdr_required;
 394    char ifname[128] = {0,};
 395    const char *setup_script;
 396
 397    if (qemu_opt_get(opts, "ifname")) {
 398        pstrcpy(ifname, sizeof(ifname), qemu_opt_get(opts, "ifname"));
 399    }
 400
 401    *vnet_hdr = qemu_opt_get_bool(opts, "vnet_hdr", 1);
 402    if (qemu_opt_get(opts, "vnet_hdr")) {
 403        vnet_hdr_required = *vnet_hdr;
 404    } else {
 405        vnet_hdr_required = 0;
 406    }
 407
 408    TFR(fd = tap_open(ifname, sizeof(ifname), vnet_hdr, vnet_hdr_required));
 409    if (fd < 0) {
 410        return -1;
 411    }
 412
 413    setup_script = qemu_opt_get(opts, "script");
 414    if (setup_script &&
 415        setup_script[0] != '\0' &&
 416        strcmp(setup_script, "no") != 0 &&
 417        launch_script(setup_script, ifname, fd)) {
 418        close(fd);
 419        return -1;
 420    }
 421
 422    qemu_opt_set(opts, "ifname", ifname);
 423
 424    return fd;
 425}
 426
 427int net_init_tap(QemuOpts *opts, Monitor *mon, const char *name, VLANState *vlan)
 428{
 429    TAPState *s;
 430    int fd, vnet_hdr = 0;
 431
 432    if (qemu_opt_get(opts, "fd")) {
 433        if (qemu_opt_get(opts, "ifname") ||
 434            qemu_opt_get(opts, "script") ||
 435            qemu_opt_get(opts, "downscript") ||
 436            qemu_opt_get(opts, "vnet_hdr")) {
 437            error_report("ifname=, script=, downscript= and vnet_hdr= is invalid with fd=");
 438            return -1;
 439        }
 440
 441        fd = net_handle_fd_param(mon, qemu_opt_get(opts, "fd"));
 442        if (fd == -1) {
 443            return -1;
 444        }
 445
 446        fcntl(fd, F_SETFL, O_NONBLOCK);
 447
 448        vnet_hdr = tap_probe_vnet_hdr(fd);
 449    } else {
 450        if (!qemu_opt_get(opts, "script")) {
 451            qemu_opt_set(opts, "script", DEFAULT_NETWORK_SCRIPT);
 452        }
 453
 454        if (!qemu_opt_get(opts, "downscript")) {
 455            qemu_opt_set(opts, "downscript", DEFAULT_NETWORK_DOWN_SCRIPT);
 456        }
 457
 458        fd = net_tap_init(opts, &vnet_hdr);
 459        if (fd == -1) {
 460            return -1;
 461        }
 462    }
 463
 464    s = net_tap_fd_init(vlan, "tap", name, fd, vnet_hdr);
 465    if (!s) {
 466        close(fd);
 467        return -1;
 468    }
 469
 470    if (tap_set_sndbuf(s->fd, opts) < 0) {
 471        return -1;
 472    }
 473
 474    if (qemu_opt_get(opts, "fd")) {
 475        snprintf(s->nc.info_str, sizeof(s->nc.info_str), "fd=%d", fd);
 476    } else {
 477        const char *ifname, *script, *downscript;
 478
 479        ifname     = qemu_opt_get(opts, "ifname");
 480        script     = qemu_opt_get(opts, "script");
 481        downscript = qemu_opt_get(opts, "downscript");
 482
 483        snprintf(s->nc.info_str, sizeof(s->nc.info_str),
 484                 "ifname=%s,script=%s,downscript=%s",
 485                 ifname, script, downscript);
 486
 487        if (strcmp(downscript, "no") != 0) {
 488            snprintf(s->down_script, sizeof(s->down_script), "%s", downscript);
 489            snprintf(s->down_script_arg, sizeof(s->down_script_arg), "%s", ifname);
 490        }
 491    }
 492
 493    if (qemu_opt_get_bool(opts, "vhost", !!qemu_opt_get(opts, "vhostfd") ||
 494                          qemu_opt_get_bool(opts, "vhostforce", false))) {
 495        int vhostfd, r;
 496        bool force = qemu_opt_get_bool(opts, "vhostforce", false);
 497        if (qemu_opt_get(opts, "vhostfd")) {
 498            r = net_handle_fd_param(mon, qemu_opt_get(opts, "vhostfd"));
 499            if (r == -1) {
 500                return -1;
 501            }
 502            vhostfd = r;
 503        } else {
 504            vhostfd = -1;
 505        }
 506        s->vhost_net = vhost_net_init(&s->nc, vhostfd, force);
 507        if (!s->vhost_net) {
 508            error_report("vhost-net requested but could not be initialized");
 509            return -1;
 510        }
 511    } else if (qemu_opt_get(opts, "vhostfd")) {
 512        error_report("vhostfd= is not valid without vhost");
 513        return -1;
 514    }
 515
 516    return 0;
 517}
 518
 519VHostNetState *tap_get_vhost_net(VLANClientState *nc)
 520{
 521    TAPState *s = DO_UPCAST(TAPState, nc, nc);
 522    assert(nc->info->type == NET_CLIENT_TYPE_TAP);
 523    return s->vhost_net;
 524}
 525