qemu/net/netmap.c
<<
>>
Prefs
   1/*
   2 * netmap access for qemu
   3 *
   4 * Copyright (c) 2012-2013 Luigi Rizzo
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25
  26#include <sys/ioctl.h>
  27#include <net/if.h>
  28#include <sys/mman.h>
  29#include <stdint.h>
  30#include <stdio.h>
  31#define NETMAP_WITH_LIBS
  32#include <net/netmap.h>
  33#include <net/netmap_user.h>
  34
  35#include "net/net.h"
  36#include "net/tap.h"
  37#include "clients.h"
  38#include "sysemu/sysemu.h"
  39#include "qemu/error-report.h"
  40#include "qemu/iov.h"
  41
  42/* Private netmap device info. */
  43typedef struct NetmapPriv {
  44    int                 fd;
  45    size_t              memsize;
  46    void                *mem;
  47    struct netmap_if    *nifp;
  48    struct netmap_ring  *rx;
  49    struct netmap_ring  *tx;
  50    char                fdname[PATH_MAX];        /* Normally "/dev/netmap". */
  51    char                ifname[IFNAMSIZ];
  52} NetmapPriv;
  53
  54typedef struct NetmapState {
  55    NetClientState      nc;
  56    NetmapPriv          me;
  57    bool                read_poll;
  58    bool                write_poll;
  59    struct iovec        iov[IOV_MAX];
  60    int                 vnet_hdr_len;  /* Current virtio-net header length. */
  61} NetmapState;
  62
  63#ifndef __FreeBSD__
  64#define pkt_copy bcopy
  65#else
  66/* A fast copy routine only for multiples of 64 bytes, non overlapped. */
  67static inline void
  68pkt_copy(const void *_src, void *_dst, int l)
  69{
  70    const uint64_t *src = _src;
  71    uint64_t *dst = _dst;
  72    if (unlikely(l >= 1024)) {
  73        bcopy(src, dst, l);
  74        return;
  75    }
  76    for (; l > 0; l -= 64) {
  77        *dst++ = *src++;
  78        *dst++ = *src++;
  79        *dst++ = *src++;
  80        *dst++ = *src++;
  81        *dst++ = *src++;
  82        *dst++ = *src++;
  83        *dst++ = *src++;
  84        *dst++ = *src++;
  85    }
  86}
  87#endif /* __FreeBSD__ */
  88
  89/*
  90 * Open a netmap device. We assume there is only one queue
  91 * (which is the case for the VALE bridge).
  92 */
  93static int netmap_open(NetmapPriv *me)
  94{
  95    int fd;
  96    int err;
  97    size_t l;
  98    struct nmreq req;
  99
 100    me->fd = fd = open(me->fdname, O_RDWR);
 101    if (fd < 0) {
 102        error_report("Unable to open netmap device '%s' (%s)",
 103                        me->fdname, strerror(errno));
 104        return -1;
 105    }
 106    memset(&req, 0, sizeof(req));
 107    pstrcpy(req.nr_name, sizeof(req.nr_name), me->ifname);
 108    req.nr_ringid = NETMAP_NO_TX_POLL;
 109    req.nr_version = NETMAP_API;
 110    err = ioctl(fd, NIOCREGIF, &req);
 111    if (err) {
 112        error_report("Unable to register %s: %s", me->ifname, strerror(errno));
 113        goto error;
 114    }
 115    l = me->memsize = req.nr_memsize;
 116
 117    me->mem = mmap(0, l, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0);
 118    if (me->mem == MAP_FAILED) {
 119        error_report("Unable to mmap netmap shared memory: %s",
 120                        strerror(errno));
 121        me->mem = NULL;
 122        goto error;
 123    }
 124
 125    me->nifp = NETMAP_IF(me->mem, req.nr_offset);
 126    me->tx = NETMAP_TXRING(me->nifp, 0);
 127    me->rx = NETMAP_RXRING(me->nifp, 0);
 128    return 0;
 129
 130error:
 131    close(me->fd);
 132    return -1;
 133}
 134
 135/* Tell the event-loop if the netmap backend can send packets
 136   to the frontend. */
 137static int netmap_can_send(void *opaque)
 138{
 139    NetmapState *s = opaque;
 140
 141    return qemu_can_send_packet(&s->nc);
 142}
 143
 144static void netmap_send(void *opaque);
 145static void netmap_writable(void *opaque);
 146
 147/* Set the event-loop handlers for the netmap backend. */
 148static void netmap_update_fd_handler(NetmapState *s)
 149{
 150    qemu_set_fd_handler2(s->me.fd,
 151                         s->read_poll  ? netmap_can_send : NULL,
 152                         s->read_poll  ? netmap_send     : NULL,
 153                         s->write_poll ? netmap_writable : NULL,
 154                         s);
 155}
 156
 157/* Update the read handler. */
 158static void netmap_read_poll(NetmapState *s, bool enable)
 159{
 160    if (s->read_poll != enable) { /* Do nothing if not changed. */
 161        s->read_poll = enable;
 162        netmap_update_fd_handler(s);
 163    }
 164}
 165
 166/* Update the write handler. */
 167static void netmap_write_poll(NetmapState *s, bool enable)
 168{
 169    if (s->write_poll != enable) {
 170        s->write_poll = enable;
 171        netmap_update_fd_handler(s);
 172    }
 173}
 174
 175static void netmap_poll(NetClientState *nc, bool enable)
 176{
 177    NetmapState *s = DO_UPCAST(NetmapState, nc, nc);
 178
 179    if (s->read_poll != enable || s->write_poll != enable) {
 180        s->write_poll = enable;
 181        s->read_poll  = enable;
 182        netmap_update_fd_handler(s);
 183    }
 184}
 185
 186/*
 187 * The fd_write() callback, invoked if the fd is marked as
 188 * writable after a poll. Unregister the handler and flush any
 189 * buffered packets.
 190 */
 191static void netmap_writable(void *opaque)
 192{
 193    NetmapState *s = opaque;
 194
 195    netmap_write_poll(s, false);
 196    qemu_flush_queued_packets(&s->nc);
 197}
 198
 199static ssize_t netmap_receive(NetClientState *nc,
 200      const uint8_t *buf, size_t size)
 201{
 202    NetmapState *s = DO_UPCAST(NetmapState, nc, nc);
 203    struct netmap_ring *ring = s->me.tx;
 204    uint32_t i;
 205    uint32_t idx;
 206    uint8_t *dst;
 207
 208    if (unlikely(!ring)) {
 209        /* Drop. */
 210        return size;
 211    }
 212
 213    if (unlikely(size > ring->nr_buf_size)) {
 214        RD(5, "[netmap_receive] drop packet of size %d > %d\n",
 215                                    (int)size, ring->nr_buf_size);
 216        return size;
 217    }
 218
 219    if (nm_ring_empty(ring)) {
 220        /* No available slots in the netmap TX ring. */
 221        netmap_write_poll(s, true);
 222        return 0;
 223    }
 224
 225    i = ring->cur;
 226    idx = ring->slot[i].buf_idx;
 227    dst = (uint8_t *)NETMAP_BUF(ring, idx);
 228
 229    ring->slot[i].len = size;
 230    ring->slot[i].flags = 0;
 231    pkt_copy(buf, dst, size);
 232    ring->cur = ring->head = nm_ring_next(ring, i);
 233    ioctl(s->me.fd, NIOCTXSYNC, NULL);
 234
 235    return size;
 236}
 237
 238static ssize_t netmap_receive_iov(NetClientState *nc,
 239                    const struct iovec *iov, int iovcnt)
 240{
 241    NetmapState *s = DO_UPCAST(NetmapState, nc, nc);
 242    struct netmap_ring *ring = s->me.tx;
 243    uint32_t last;
 244    uint32_t idx;
 245    uint8_t *dst;
 246    int j;
 247    uint32_t i;
 248
 249    if (unlikely(!ring)) {
 250        /* Drop the packet. */
 251        return iov_size(iov, iovcnt);
 252    }
 253
 254    last = i = ring->cur;
 255
 256    if (nm_ring_space(ring) < iovcnt) {
 257        /* Not enough netmap slots. */
 258        netmap_write_poll(s, true);
 259        return 0;
 260    }
 261
 262    for (j = 0; j < iovcnt; j++) {
 263        int iov_frag_size = iov[j].iov_len;
 264        int offset = 0;
 265        int nm_frag_size;
 266
 267        /* Split each iovec fragment over more netmap slots, if
 268           necessary. */
 269        while (iov_frag_size) {
 270            nm_frag_size = MIN(iov_frag_size, ring->nr_buf_size);
 271
 272            if (unlikely(nm_ring_empty(ring))) {
 273                /* We run out of netmap slots while splitting the
 274                   iovec fragments. */
 275                netmap_write_poll(s, true);
 276                return 0;
 277            }
 278
 279            idx = ring->slot[i].buf_idx;
 280            dst = (uint8_t *)NETMAP_BUF(ring, idx);
 281
 282            ring->slot[i].len = nm_frag_size;
 283            ring->slot[i].flags = NS_MOREFRAG;
 284            pkt_copy(iov[j].iov_base + offset, dst, nm_frag_size);
 285
 286            last = i;
 287            i = nm_ring_next(ring, i);
 288
 289            offset += nm_frag_size;
 290            iov_frag_size -= nm_frag_size;
 291        }
 292    }
 293    /* The last slot must not have NS_MOREFRAG set. */
 294    ring->slot[last].flags &= ~NS_MOREFRAG;
 295
 296    /* Now update ring->cur and ring->head. */
 297    ring->cur = ring->head = i;
 298
 299    ioctl(s->me.fd, NIOCTXSYNC, NULL);
 300
 301    return iov_size(iov, iovcnt);
 302}
 303
 304/* Complete a previous send (backend --> guest) and enable the
 305   fd_read callback. */
 306static void netmap_send_completed(NetClientState *nc, ssize_t len)
 307{
 308    NetmapState *s = DO_UPCAST(NetmapState, nc, nc);
 309
 310    netmap_read_poll(s, true);
 311}
 312
 313static void netmap_send(void *opaque)
 314{
 315    NetmapState *s = opaque;
 316    struct netmap_ring *ring = s->me.rx;
 317
 318    /* Keep sending while there are available packets into the netmap
 319       RX ring and the forwarding path towards the peer is open. */
 320    while (!nm_ring_empty(ring) && qemu_can_send_packet(&s->nc)) {
 321        uint32_t i;
 322        uint32_t idx;
 323        bool morefrag;
 324        int iovcnt = 0;
 325        int iovsize;
 326
 327        do {
 328            i = ring->cur;
 329            idx = ring->slot[i].buf_idx;
 330            morefrag = (ring->slot[i].flags & NS_MOREFRAG);
 331            s->iov[iovcnt].iov_base = (u_char *)NETMAP_BUF(ring, idx);
 332            s->iov[iovcnt].iov_len = ring->slot[i].len;
 333            iovcnt++;
 334
 335            ring->cur = ring->head = nm_ring_next(ring, i);
 336        } while (!nm_ring_empty(ring) && morefrag);
 337
 338        if (unlikely(nm_ring_empty(ring) && morefrag)) {
 339            RD(5, "[netmap_send] ran out of slots, with a pending"
 340                   "incomplete packet\n");
 341        }
 342
 343        iovsize = qemu_sendv_packet_async(&s->nc, s->iov, iovcnt,
 344                                            netmap_send_completed);
 345
 346        if (iovsize == 0) {
 347            /* The peer does not receive anymore. Packet is queued, stop
 348             * reading from the backend until netmap_send_completed()
 349             */
 350            netmap_read_poll(s, false);
 351            break;
 352        }
 353    }
 354}
 355
 356/* Flush and close. */
 357static void netmap_cleanup(NetClientState *nc)
 358{
 359    NetmapState *s = DO_UPCAST(NetmapState, nc, nc);
 360
 361    qemu_purge_queued_packets(nc);
 362
 363    netmap_poll(nc, false);
 364    munmap(s->me.mem, s->me.memsize);
 365    close(s->me.fd);
 366
 367    s->me.fd = -1;
 368}
 369
 370/* Offloading manipulation support callbacks. */
 371static bool netmap_has_ufo(NetClientState *nc)
 372{
 373    return true;
 374}
 375
 376static bool netmap_has_vnet_hdr(NetClientState *nc)
 377{
 378    return true;
 379}
 380
 381static bool netmap_has_vnet_hdr_len(NetClientState *nc, int len)
 382{
 383    return len == 0 || len == sizeof(struct virtio_net_hdr) ||
 384                len == sizeof(struct virtio_net_hdr_mrg_rxbuf);
 385}
 386
 387static void netmap_using_vnet_hdr(NetClientState *nc, bool enable)
 388{
 389}
 390
 391static void netmap_set_vnet_hdr_len(NetClientState *nc, int len)
 392{
 393    NetmapState *s = DO_UPCAST(NetmapState, nc, nc);
 394    int err;
 395    struct nmreq req;
 396
 397    /* Issue a NETMAP_BDG_VNET_HDR command to change the virtio-net header
 398     * length for the netmap adapter associated to 'me->ifname'.
 399     */
 400    memset(&req, 0, sizeof(req));
 401    pstrcpy(req.nr_name, sizeof(req.nr_name), s->me.ifname);
 402    req.nr_version = NETMAP_API;
 403    req.nr_cmd = NETMAP_BDG_VNET_HDR;
 404    req.nr_arg1 = len;
 405    err = ioctl(s->me.fd, NIOCREGIF, &req);
 406    if (err) {
 407        error_report("Unable to execute NETMAP_BDG_VNET_HDR on %s: %s",
 408                     s->me.ifname, strerror(errno));
 409    } else {
 410        /* Keep track of the current length. */
 411        s->vnet_hdr_len = len;
 412    }
 413}
 414
 415static void netmap_set_offload(NetClientState *nc, int csum, int tso4, int tso6,
 416                               int ecn, int ufo)
 417{
 418    NetmapState *s = DO_UPCAST(NetmapState, nc, nc);
 419
 420    /* Setting a virtio-net header length greater than zero automatically
 421     * enables the offloadings.
 422     */
 423    if (!s->vnet_hdr_len) {
 424        netmap_set_vnet_hdr_len(nc, sizeof(struct virtio_net_hdr));
 425    }
 426}
 427
 428/* NetClientInfo methods */
 429static NetClientInfo net_netmap_info = {
 430    .type = NET_CLIENT_OPTIONS_KIND_NETMAP,
 431    .size = sizeof(NetmapState),
 432    .receive = netmap_receive,
 433    .receive_iov = netmap_receive_iov,
 434    .poll = netmap_poll,
 435    .cleanup = netmap_cleanup,
 436    .has_ufo = netmap_has_ufo,
 437    .has_vnet_hdr = netmap_has_vnet_hdr,
 438    .has_vnet_hdr_len = netmap_has_vnet_hdr_len,
 439    .using_vnet_hdr = netmap_using_vnet_hdr,
 440    .set_offload = netmap_set_offload,
 441    .set_vnet_hdr_len = netmap_set_vnet_hdr_len,
 442};
 443
 444/* The exported init function
 445 *
 446 * ... -net netmap,ifname="..."
 447 */
 448int net_init_netmap(const NetClientOptions *opts,
 449        const char *name, NetClientState *peer)
 450{
 451    const NetdevNetmapOptions *netmap_opts = opts->netmap;
 452    NetClientState *nc;
 453    NetmapPriv me;
 454    NetmapState *s;
 455
 456    pstrcpy(me.fdname, sizeof(me.fdname),
 457        netmap_opts->has_devname ? netmap_opts->devname : "/dev/netmap");
 458    /* Set default name for the port if not supplied. */
 459    pstrcpy(me.ifname, sizeof(me.ifname), netmap_opts->ifname);
 460    if (netmap_open(&me)) {
 461        return -1;
 462    }
 463    /* Create the object. */
 464    nc = qemu_new_net_client(&net_netmap_info, peer, "netmap", name);
 465    s = DO_UPCAST(NetmapState, nc, nc);
 466    s->me = me;
 467    s->vnet_hdr_len = 0;
 468    netmap_read_poll(s, true); /* Initially only poll for reads. */
 469
 470    return 0;
 471}
 472
 473