qemu/hw/net/xen_nic.c
<<
>>
Prefs
   1/*
   2 *  xen paravirt network card backend
   3 *
   4 *  (c) Gerd Hoffmann <kraxel@redhat.com>
   5 *
   6 *  This program is free software; you can redistribute it and/or modify
   7 *  it under the terms of the GNU General Public License as published by
   8 *  the Free Software Foundation; under version 2 of the License.
   9 *
  10 *  This program is distributed in the hope that it will be useful,
  11 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 *  GNU General Public License for more details.
  14 *
  15 *  You should have received a copy of the GNU General Public License along
  16 *  with this program; if not, see <http://www.gnu.org/licenses/>.
  17 *
  18 *  Contributions after 2012-01-13 are licensed under the terms of the
  19 *  GNU GPL, version 2 or (at your option) any later version.
  20 */
  21
  22#include <stdio.h>
  23#include <stdlib.h>
  24#include <stdarg.h>
  25#include <string.h>
  26#include <unistd.h>
  27#include <signal.h>
  28#include <inttypes.h>
  29#include <fcntl.h>
  30#include <errno.h>
  31#include <sys/socket.h>
  32#include <sys/ioctl.h>
  33#include <sys/types.h>
  34#include <sys/stat.h>
  35#include <sys/mman.h>
  36#include <sys/wait.h>
  37
  38#include "hw/hw.h"
  39#include "net/net.h"
  40#include "net/checksum.h"
  41#include "net/util.h"
  42#include "hw/xen/xen_backend.h"
  43
  44#include <xen/io/netif.h>
  45
  46/* ------------------------------------------------------------- */
  47
  48struct XenNetDev {
  49    struct XenDevice      xendev;  /* must be first */
  50    char                  *mac;
  51    int                   tx_work;
  52    int                   tx_ring_ref;
  53    int                   rx_ring_ref;
  54    struct netif_tx_sring *txs;
  55    struct netif_rx_sring *rxs;
  56    netif_tx_back_ring_t  tx_ring;
  57    netif_rx_back_ring_t  rx_ring;
  58    NICConf               conf;
  59    NICState              *nic;
  60};
  61
  62/* ------------------------------------------------------------- */
  63
  64static void net_tx_response(struct XenNetDev *netdev, netif_tx_request_t *txp, int8_t st)
  65{
  66    RING_IDX i = netdev->tx_ring.rsp_prod_pvt;
  67    netif_tx_response_t *resp;
  68    int notify;
  69
  70    resp = RING_GET_RESPONSE(&netdev->tx_ring, i);
  71    resp->id     = txp->id;
  72    resp->status = st;
  73
  74#if 0
  75    if (txp->flags & NETTXF_extra_info) {
  76        RING_GET_RESPONSE(&netdev->tx_ring, ++i)->status = NETIF_RSP_NULL;
  77    }
  78#endif
  79
  80    netdev->tx_ring.rsp_prod_pvt = ++i;
  81    RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netdev->tx_ring, notify);
  82    if (notify) {
  83        xen_be_send_notify(&netdev->xendev);
  84    }
  85
  86    if (i == netdev->tx_ring.req_cons) {
  87        int more_to_do;
  88        RING_FINAL_CHECK_FOR_REQUESTS(&netdev->tx_ring, more_to_do);
  89        if (more_to_do) {
  90            netdev->tx_work++;
  91        }
  92    }
  93}
  94
  95static void net_tx_error(struct XenNetDev *netdev, netif_tx_request_t *txp, RING_IDX end)
  96{
  97#if 0
  98    /*
  99     * Hmm, why netback fails everything in the ring?
 100     * Should we do that even when not supporting SG and TSO?
 101     */
 102    RING_IDX cons = netdev->tx_ring.req_cons;
 103
 104    do {
 105        make_tx_response(netif, txp, NETIF_RSP_ERROR);
 106        if (cons >= end) {
 107            break;
 108        }
 109        txp = RING_GET_REQUEST(&netdev->tx_ring, cons++);
 110    } while (1);
 111    netdev->tx_ring.req_cons = cons;
 112    netif_schedule_work(netif);
 113    netif_put(netif);
 114#else
 115    net_tx_response(netdev, txp, NETIF_RSP_ERROR);
 116#endif
 117}
 118
 119static void net_tx_packets(struct XenNetDev *netdev)
 120{
 121    netif_tx_request_t txreq;
 122    RING_IDX rc, rp;
 123    void *page;
 124    void *tmpbuf = NULL;
 125
 126    for (;;) {
 127        rc = netdev->tx_ring.req_cons;
 128        rp = netdev->tx_ring.sring->req_prod;
 129        xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
 130
 131        while ((rc != rp)) {
 132            if (RING_REQUEST_CONS_OVERFLOW(&netdev->tx_ring, rc)) {
 133                break;
 134            }
 135            memcpy(&txreq, RING_GET_REQUEST(&netdev->tx_ring, rc), sizeof(txreq));
 136            netdev->tx_ring.req_cons = ++rc;
 137
 138#if 1
 139            /* should not happen in theory, we don't announce the *
 140             * feature-{sg,gso,whatelse} flags in xenstore (yet?) */
 141            if (txreq.flags & NETTXF_extra_info) {
 142                xen_be_printf(&netdev->xendev, 0, "FIXME: extra info flag\n");
 143                net_tx_error(netdev, &txreq, rc);
 144                continue;
 145            }
 146            if (txreq.flags & NETTXF_more_data) {
 147                xen_be_printf(&netdev->xendev, 0, "FIXME: more data flag\n");
 148                net_tx_error(netdev, &txreq, rc);
 149                continue;
 150            }
 151#endif
 152
 153            if (txreq.size < 14) {
 154                xen_be_printf(&netdev->xendev, 0, "bad packet size: %d\n", txreq.size);
 155                net_tx_error(netdev, &txreq, rc);
 156                continue;
 157            }
 158
 159            if ((txreq.offset + txreq.size) > XC_PAGE_SIZE) {
 160                xen_be_printf(&netdev->xendev, 0, "error: page crossing\n");
 161                net_tx_error(netdev, &txreq, rc);
 162                continue;
 163            }
 164
 165            xen_be_printf(&netdev->xendev, 3, "tx packet ref %d, off %d, len %d, flags 0x%x%s%s%s%s\n",
 166                          txreq.gref, txreq.offset, txreq.size, txreq.flags,
 167                          (txreq.flags & NETTXF_csum_blank)     ? " csum_blank"     : "",
 168                          (txreq.flags & NETTXF_data_validated) ? " data_validated" : "",
 169                          (txreq.flags & NETTXF_more_data)      ? " more_data"      : "",
 170                          (txreq.flags & NETTXF_extra_info)     ? " extra_info"     : "");
 171
 172            page = xc_gnttab_map_grant_ref(netdev->xendev.gnttabdev,
 173                                           netdev->xendev.dom,
 174                                           txreq.gref, PROT_READ);
 175            if (page == NULL) {
 176                xen_be_printf(&netdev->xendev, 0, "error: tx gref dereference failed (%d)\n",
 177                              txreq.gref);
 178                net_tx_error(netdev, &txreq, rc);
 179                continue;
 180            }
 181            if (txreq.flags & NETTXF_csum_blank) {
 182                /* have read-only mapping -> can't fill checksum in-place */
 183                if (!tmpbuf) {
 184                    tmpbuf = g_malloc(XC_PAGE_SIZE);
 185                }
 186                memcpy(tmpbuf, page + txreq.offset, txreq.size);
 187                net_checksum_calculate(tmpbuf, txreq.size);
 188                qemu_send_packet(qemu_get_queue(netdev->nic), tmpbuf,
 189                                 txreq.size);
 190            } else {
 191                qemu_send_packet(qemu_get_queue(netdev->nic),
 192                                 page + txreq.offset, txreq.size);
 193            }
 194            xc_gnttab_munmap(netdev->xendev.gnttabdev, page, 1);
 195            net_tx_response(netdev, &txreq, NETIF_RSP_OKAY);
 196        }
 197        if (!netdev->tx_work) {
 198            break;
 199        }
 200        netdev->tx_work = 0;
 201    }
 202    g_free(tmpbuf);
 203}
 204
 205/* ------------------------------------------------------------- */
 206
 207static void net_rx_response(struct XenNetDev *netdev,
 208                            netif_rx_request_t *req, int8_t st,
 209                            uint16_t offset, uint16_t size,
 210                            uint16_t flags)
 211{
 212    RING_IDX i = netdev->rx_ring.rsp_prod_pvt;
 213    netif_rx_response_t *resp;
 214    int notify;
 215
 216    resp = RING_GET_RESPONSE(&netdev->rx_ring, i);
 217    resp->offset     = offset;
 218    resp->flags      = flags;
 219    resp->id         = req->id;
 220    resp->status     = (int16_t)size;
 221    if (st < 0) {
 222        resp->status = (int16_t)st;
 223    }
 224
 225    xen_be_printf(&netdev->xendev, 3, "rx response: idx %d, status %d, flags 0x%x\n",
 226                  i, resp->status, resp->flags);
 227
 228    netdev->rx_ring.rsp_prod_pvt = ++i;
 229    RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netdev->rx_ring, notify);
 230    if (notify) {
 231        xen_be_send_notify(&netdev->xendev);
 232    }
 233}
 234
 235#define NET_IP_ALIGN 2
 236
 237static int net_rx_ok(NetClientState *nc)
 238{
 239    struct XenNetDev *netdev = qemu_get_nic_opaque(nc);
 240    RING_IDX rc, rp;
 241
 242    if (netdev->xendev.be_state != XenbusStateConnected) {
 243        return 0;
 244    }
 245
 246    rc = netdev->rx_ring.req_cons;
 247    rp = netdev->rx_ring.sring->req_prod;
 248    xen_rmb();
 249
 250    if (rc == rp || RING_REQUEST_CONS_OVERFLOW(&netdev->rx_ring, rc)) {
 251        xen_be_printf(&netdev->xendev, 2, "%s: no rx buffers (%d/%d)\n",
 252                      __FUNCTION__, rc, rp);
 253        return 0;
 254    }
 255    return 1;
 256}
 257
 258static ssize_t net_rx_packet(NetClientState *nc, const uint8_t *buf, size_t size)
 259{
 260    struct XenNetDev *netdev = qemu_get_nic_opaque(nc);
 261    netif_rx_request_t rxreq;
 262    RING_IDX rc, rp;
 263    void *page;
 264
 265    if (netdev->xendev.be_state != XenbusStateConnected) {
 266        return -1;
 267    }
 268
 269    rc = netdev->rx_ring.req_cons;
 270    rp = netdev->rx_ring.sring->req_prod;
 271    xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
 272
 273    if (rc == rp || RING_REQUEST_CONS_OVERFLOW(&netdev->rx_ring, rc)) {
 274        xen_be_printf(&netdev->xendev, 2, "no buffer, drop packet\n");
 275        return -1;
 276    }
 277    if (size > XC_PAGE_SIZE - NET_IP_ALIGN) {
 278        xen_be_printf(&netdev->xendev, 0, "packet too big (%lu > %ld)",
 279                      (unsigned long)size, XC_PAGE_SIZE - NET_IP_ALIGN);
 280        return -1;
 281    }
 282
 283    memcpy(&rxreq, RING_GET_REQUEST(&netdev->rx_ring, rc), sizeof(rxreq));
 284    netdev->rx_ring.req_cons = ++rc;
 285
 286    page = xc_gnttab_map_grant_ref(netdev->xendev.gnttabdev,
 287                                   netdev->xendev.dom,
 288                                   rxreq.gref, PROT_WRITE);
 289    if (page == NULL) {
 290        xen_be_printf(&netdev->xendev, 0, "error: rx gref dereference failed (%d)\n",
 291                      rxreq.gref);
 292        net_rx_response(netdev, &rxreq, NETIF_RSP_ERROR, 0, 0, 0);
 293        return -1;
 294    }
 295    memcpy(page + NET_IP_ALIGN, buf, size);
 296    xc_gnttab_munmap(netdev->xendev.gnttabdev, page, 1);
 297    net_rx_response(netdev, &rxreq, NETIF_RSP_OKAY, NET_IP_ALIGN, size, 0);
 298
 299    return size;
 300}
 301
 302/* ------------------------------------------------------------- */
 303
 304static NetClientInfo net_xen_info = {
 305    .type = NET_CLIENT_OPTIONS_KIND_NIC,
 306    .size = sizeof(NICState),
 307    .can_receive = net_rx_ok,
 308    .receive = net_rx_packet,
 309};
 310
 311static int net_init(struct XenDevice *xendev)
 312{
 313    struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev);
 314
 315    /* read xenstore entries */
 316    if (netdev->mac == NULL) {
 317        netdev->mac = xenstore_read_be_str(&netdev->xendev, "mac");
 318    }
 319
 320    /* do we have all we need? */
 321    if (netdev->mac == NULL) {
 322        return -1;
 323    }
 324
 325    if (net_parse_macaddr(netdev->conf.macaddr.a, netdev->mac) < 0) {
 326        return -1;
 327    }
 328
 329    netdev->nic = qemu_new_nic(&net_xen_info, &netdev->conf,
 330                               "xen", NULL, netdev);
 331
 332    snprintf(qemu_get_queue(netdev->nic)->info_str,
 333             sizeof(qemu_get_queue(netdev->nic)->info_str),
 334             "nic: xenbus vif macaddr=%s", netdev->mac);
 335
 336    /* fill info */
 337    xenstore_write_be_int(&netdev->xendev, "feature-rx-copy", 1);
 338    xenstore_write_be_int(&netdev->xendev, "feature-rx-flip", 0);
 339
 340    return 0;
 341}
 342
 343static int net_connect(struct XenDevice *xendev)
 344{
 345    struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev);
 346    int rx_copy;
 347
 348    if (xenstore_read_fe_int(&netdev->xendev, "tx-ring-ref",
 349                             &netdev->tx_ring_ref) == -1) {
 350        return -1;
 351    }
 352    if (xenstore_read_fe_int(&netdev->xendev, "rx-ring-ref",
 353                             &netdev->rx_ring_ref) == -1) {
 354        return 1;
 355    }
 356    if (xenstore_read_fe_int(&netdev->xendev, "event-channel",
 357                             &netdev->xendev.remote_port) == -1) {
 358        return -1;
 359    }
 360
 361    if (xenstore_read_fe_int(&netdev->xendev, "request-rx-copy", &rx_copy) == -1) {
 362        rx_copy = 0;
 363    }
 364    if (rx_copy == 0) {
 365        xen_be_printf(&netdev->xendev, 0, "frontend doesn't support rx-copy.\n");
 366        return -1;
 367    }
 368
 369    netdev->txs = xc_gnttab_map_grant_ref(netdev->xendev.gnttabdev,
 370                                          netdev->xendev.dom,
 371                                          netdev->tx_ring_ref,
 372                                          PROT_READ | PROT_WRITE);
 373    netdev->rxs = xc_gnttab_map_grant_ref(netdev->xendev.gnttabdev,
 374                                          netdev->xendev.dom,
 375                                          netdev->rx_ring_ref,
 376                                          PROT_READ | PROT_WRITE);
 377    if (!netdev->txs || !netdev->rxs) {
 378        return -1;
 379    }
 380    BACK_RING_INIT(&netdev->tx_ring, netdev->txs, XC_PAGE_SIZE);
 381    BACK_RING_INIT(&netdev->rx_ring, netdev->rxs, XC_PAGE_SIZE);
 382
 383    xen_be_bind_evtchn(&netdev->xendev);
 384
 385    xen_be_printf(&netdev->xendev, 1, "ok: tx-ring-ref %d, rx-ring-ref %d, "
 386                  "remote port %d, local port %d\n",
 387                  netdev->tx_ring_ref, netdev->rx_ring_ref,
 388                  netdev->xendev.remote_port, netdev->xendev.local_port);
 389
 390    net_tx_packets(netdev);
 391    return 0;
 392}
 393
 394static void net_disconnect(struct XenDevice *xendev)
 395{
 396    struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev);
 397
 398    xen_be_unbind_evtchn(&netdev->xendev);
 399
 400    if (netdev->txs) {
 401        xc_gnttab_munmap(netdev->xendev.gnttabdev, netdev->txs, 1);
 402        netdev->txs = NULL;
 403    }
 404    if (netdev->rxs) {
 405        xc_gnttab_munmap(netdev->xendev.gnttabdev, netdev->rxs, 1);
 406        netdev->rxs = NULL;
 407    }
 408    if (netdev->nic) {
 409        qemu_del_nic(netdev->nic);
 410        netdev->nic = NULL;
 411    }
 412}
 413
 414static void net_event(struct XenDevice *xendev)
 415{
 416    struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev);
 417    net_tx_packets(netdev);
 418    qemu_flush_queued_packets(qemu_get_queue(netdev->nic));
 419}
 420
 421static int net_free(struct XenDevice *xendev)
 422{
 423    struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev);
 424
 425    g_free(netdev->mac);
 426    return 0;
 427}
 428
 429/* ------------------------------------------------------------- */
 430
 431struct XenDevOps xen_netdev_ops = {
 432    .size       = sizeof(struct XenNetDev),
 433    .flags      = DEVOPS_FLAG_NEED_GNTDEV,
 434    .init       = net_init,
 435    .initialise    = net_connect,
 436    .event      = net_event,
 437    .disconnect = net_disconnect,
 438    .free       = net_free,
 439};
 440