qemu/hw/xen_nic.c
<<
>>
Prefs
   1/*
   2 *  xen paravirt network card backend
   3 *
   4 *  (c) Gerd Hoffmann <kraxel@redhat.com>
   5 *
   6 *  This program is free software; you can redistribute it and/or modify
   7 *  it under the terms of the GNU General Public License as published by
   8 *  the Free Software Foundation; under version 2 of the License.
   9 *
  10 *  This program is distributed in the hope that it will be useful,
  11 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 *  GNU General Public License for more details.
  14 *
  15 *  You should have received a copy of the GNU General Public License along
  16 *  with this program; if not, see <http://www.gnu.org/licenses/>.
  17 */
  18
  19#include <stdio.h>
  20#include <stdlib.h>
  21#include <stdarg.h>
  22#include <string.h>
  23#include <unistd.h>
  24#include <signal.h>
  25#include <inttypes.h>
  26#include <fcntl.h>
  27#include <errno.h>
  28#include <pthread.h>
  29#include <sys/socket.h>
  30#include <sys/ioctl.h>
  31#include <sys/types.h>
  32#include <sys/stat.h>
  33#include <sys/mman.h>
  34#include <sys/wait.h>
  35
  36#include <xs.h>
  37#include <xenctrl.h>
  38#include <xen/io/xenbus.h>
  39#include <xen/io/netif.h>
  40
  41#include "hw.h"
  42#include "net.h"
  43#include "net/checksum.h"
  44#include "net/util.h"
  45#include "qemu-char.h"
  46#include "xen_backend.h"
  47
  48/* ------------------------------------------------------------- */
  49
  50struct XenNetDev {
  51    struct XenDevice      xendev;  /* must be first */
  52    char                  *mac;
  53    int                   tx_work;
  54    int                   tx_ring_ref;
  55    int                   rx_ring_ref;
  56    struct netif_tx_sring *txs;
  57    struct netif_rx_sring *rxs;
  58    netif_tx_back_ring_t  tx_ring;
  59    netif_rx_back_ring_t  rx_ring;
  60    NICConf               conf;
  61    NICState              *nic;
  62};
  63
  64/* ------------------------------------------------------------- */
  65
  66static void net_tx_response(struct XenNetDev *netdev, netif_tx_request_t *txp, int8_t st)
  67{
  68    RING_IDX i = netdev->tx_ring.rsp_prod_pvt;
  69    netif_tx_response_t *resp;
  70    int notify;
  71
  72    resp = RING_GET_RESPONSE(&netdev->tx_ring, i);
  73    resp->id     = txp->id;
  74    resp->status = st;
  75
  76#if 0
  77    if (txp->flags & NETTXF_extra_info) {
  78        RING_GET_RESPONSE(&netdev->tx_ring, ++i)->status = NETIF_RSP_NULL;
  79    }
  80#endif
  81
  82    netdev->tx_ring.rsp_prod_pvt = ++i;
  83    RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netdev->tx_ring, notify);
  84    if (notify) {
  85        xen_be_send_notify(&netdev->xendev);
  86    }
  87
  88    if (i == netdev->tx_ring.req_cons) {
  89        int more_to_do;
  90        RING_FINAL_CHECK_FOR_REQUESTS(&netdev->tx_ring, more_to_do);
  91        if (more_to_do) {
  92            netdev->tx_work++;
  93        }
  94    }
  95}
  96
  97static void net_tx_error(struct XenNetDev *netdev, netif_tx_request_t *txp, RING_IDX end)
  98{
  99#if 0
 100    /*
 101     * Hmm, why netback fails everything in the ring?
 102     * Should we do that even when not supporting SG and TSO?
 103     */
 104    RING_IDX cons = netdev->tx_ring.req_cons;
 105
 106    do {
 107        make_tx_response(netif, txp, NETIF_RSP_ERROR);
 108        if (cons >= end) {
 109            break;
 110        }
 111        txp = RING_GET_REQUEST(&netdev->tx_ring, cons++);
 112    } while (1);
 113    netdev->tx_ring.req_cons = cons;
 114    netif_schedule_work(netif);
 115    netif_put(netif);
 116#else
 117    net_tx_response(netdev, txp, NETIF_RSP_ERROR);
 118#endif
 119}
 120
 121static void net_tx_packets(struct XenNetDev *netdev)
 122{
 123    netif_tx_request_t txreq;
 124    RING_IDX rc, rp;
 125    void *page;
 126    void *tmpbuf = NULL;
 127
 128    for (;;) {
 129        rc = netdev->tx_ring.req_cons;
 130        rp = netdev->tx_ring.sring->req_prod;
 131        xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
 132
 133        while ((rc != rp)) {
 134            if (RING_REQUEST_CONS_OVERFLOW(&netdev->tx_ring, rc)) {
 135                break;
 136            }
 137            memcpy(&txreq, RING_GET_REQUEST(&netdev->tx_ring, rc), sizeof(txreq));
 138            netdev->tx_ring.req_cons = ++rc;
 139
 140#if 1
 141            /* should not happen in theory, we don't announce the *
 142             * feature-{sg,gso,whatelse} flags in xenstore (yet?) */
 143            if (txreq.flags & NETTXF_extra_info) {
 144                xen_be_printf(&netdev->xendev, 0, "FIXME: extra info flag\n");
 145                net_tx_error(netdev, &txreq, rc);
 146                continue;
 147            }
 148            if (txreq.flags & NETTXF_more_data) {
 149                xen_be_printf(&netdev->xendev, 0, "FIXME: more data flag\n");
 150                net_tx_error(netdev, &txreq, rc);
 151                continue;
 152            }
 153#endif
 154
 155            if (txreq.size < 14) {
 156                xen_be_printf(&netdev->xendev, 0, "bad packet size: %d\n", txreq.size);
 157                net_tx_error(netdev, &txreq, rc);
 158                continue;
 159            }
 160
 161            if ((txreq.offset + txreq.size) > XC_PAGE_SIZE) {
 162                xen_be_printf(&netdev->xendev, 0, "error: page crossing\n");
 163                net_tx_error(netdev, &txreq, rc);
 164                continue;
 165            }
 166
 167            xen_be_printf(&netdev->xendev, 3, "tx packet ref %d, off %d, len %d, flags 0x%x%s%s%s%s\n",
 168                          txreq.gref, txreq.offset, txreq.size, txreq.flags,
 169                          (txreq.flags & NETTXF_csum_blank)     ? " csum_blank"     : "",
 170                          (txreq.flags & NETTXF_data_validated) ? " data_validated" : "",
 171                          (txreq.flags & NETTXF_more_data)      ? " more_data"      : "",
 172                          (txreq.flags & NETTXF_extra_info)     ? " extra_info"     : "");
 173
 174            page = xc_gnttab_map_grant_ref(netdev->xendev.gnttabdev,
 175                                           netdev->xendev.dom,
 176                                           txreq.gref, PROT_READ);
 177            if (page == NULL) {
 178                xen_be_printf(&netdev->xendev, 0, "error: tx gref dereference failed (%d)\n",
 179                              txreq.gref);
 180                net_tx_error(netdev, &txreq, rc);
 181                continue;
 182            }
 183            if (txreq.flags & NETTXF_csum_blank) {
 184                /* have read-only mapping -> can't fill checksum in-place */
 185                if (!tmpbuf) {
 186                    tmpbuf = qemu_malloc(XC_PAGE_SIZE);
 187                }
 188                memcpy(tmpbuf, page + txreq.offset, txreq.size);
 189                net_checksum_calculate(tmpbuf, txreq.size);
 190                qemu_send_packet(&netdev->nic->nc, tmpbuf, txreq.size);
 191            } else {
 192                qemu_send_packet(&netdev->nic->nc, page + txreq.offset, txreq.size);
 193            }
 194            xc_gnttab_munmap(netdev->xendev.gnttabdev, page, 1);
 195            net_tx_response(netdev, &txreq, NETIF_RSP_OKAY);
 196        }
 197        if (!netdev->tx_work) {
 198            break;
 199        }
 200        netdev->tx_work = 0;
 201    }
 202    qemu_free(tmpbuf);
 203}
 204
 205/* ------------------------------------------------------------- */
 206
 207static void net_rx_response(struct XenNetDev *netdev,
 208                            netif_rx_request_t *req, int8_t st,
 209                            uint16_t offset, uint16_t size,
 210                            uint16_t flags)
 211{
 212    RING_IDX i = netdev->rx_ring.rsp_prod_pvt;
 213    netif_rx_response_t *resp;
 214    int notify;
 215
 216    resp = RING_GET_RESPONSE(&netdev->rx_ring, i);
 217    resp->offset     = offset;
 218    resp->flags      = flags;
 219    resp->id         = req->id;
 220    resp->status     = (int16_t)size;
 221    if (st < 0) {
 222        resp->status = (int16_t)st;
 223    }
 224
 225    xen_be_printf(&netdev->xendev, 3, "rx response: idx %d, status %d, flags 0x%x\n",
 226                  i, resp->status, resp->flags);
 227
 228    netdev->rx_ring.rsp_prod_pvt = ++i;
 229    RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netdev->rx_ring, notify);
 230    if (notify) {
 231        xen_be_send_notify(&netdev->xendev);
 232    }
 233}
 234
 235#define NET_IP_ALIGN 2
 236
 237static int net_rx_ok(VLANClientState *nc)
 238{
 239    struct XenNetDev *netdev = DO_UPCAST(NICState, nc, nc)->opaque;
 240    RING_IDX rc, rp;
 241
 242    if (netdev->xendev.be_state != XenbusStateConnected) {
 243        return 0;
 244    }
 245
 246    rc = netdev->rx_ring.req_cons;
 247    rp = netdev->rx_ring.sring->req_prod;
 248    xen_rmb();
 249
 250    if (rc == rp || RING_REQUEST_CONS_OVERFLOW(&netdev->rx_ring, rc)) {
 251        xen_be_printf(&netdev->xendev, 2, "%s: no rx buffers (%d/%d)\n",
 252                      __FUNCTION__, rc, rp);
 253        return 0;
 254    }
 255    return 1;
 256}
 257
 258static ssize_t net_rx_packet(VLANClientState *nc, const uint8_t *buf, size_t size)
 259{
 260    struct XenNetDev *netdev = DO_UPCAST(NICState, nc, nc)->opaque;
 261    netif_rx_request_t rxreq;
 262    RING_IDX rc, rp;
 263    void *page;
 264
 265    if (netdev->xendev.be_state != XenbusStateConnected) {
 266        return -1;
 267    }
 268
 269    rc = netdev->rx_ring.req_cons;
 270    rp = netdev->rx_ring.sring->req_prod;
 271    xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
 272
 273    if (rc == rp || RING_REQUEST_CONS_OVERFLOW(&netdev->rx_ring, rc)) {
 274        xen_be_printf(&netdev->xendev, 2, "no buffer, drop packet\n");
 275        return -1;
 276    }
 277    if (size > XC_PAGE_SIZE - NET_IP_ALIGN) {
 278        xen_be_printf(&netdev->xendev, 0, "packet too big (%lu > %ld)",
 279                      (unsigned long)size, XC_PAGE_SIZE - NET_IP_ALIGN);
 280        return -1;
 281    }
 282
 283    memcpy(&rxreq, RING_GET_REQUEST(&netdev->rx_ring, rc), sizeof(rxreq));
 284    netdev->rx_ring.req_cons = ++rc;
 285
 286    page = xc_gnttab_map_grant_ref(netdev->xendev.gnttabdev,
 287                                   netdev->xendev.dom,
 288                                   rxreq.gref, PROT_WRITE);
 289    if (page == NULL) {
 290        xen_be_printf(&netdev->xendev, 0, "error: rx gref dereference failed (%d)\n",
 291                      rxreq.gref);
 292        net_rx_response(netdev, &rxreq, NETIF_RSP_ERROR, 0, 0, 0);
 293        return -1;
 294    }
 295    memcpy(page + NET_IP_ALIGN, buf, size);
 296    xc_gnttab_munmap(netdev->xendev.gnttabdev, page, 1);
 297    net_rx_response(netdev, &rxreq, NETIF_RSP_OKAY, NET_IP_ALIGN, size, 0);
 298
 299    return size;
 300}
 301
 302/* ------------------------------------------------------------- */
 303
 304static NetClientInfo net_xen_info = {
 305    .type = NET_CLIENT_TYPE_NIC,
 306    .size = sizeof(NICState),
 307    .can_receive = net_rx_ok,
 308    .receive = net_rx_packet,
 309};
 310
 311static int net_init(struct XenDevice *xendev)
 312{
 313    struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev);
 314
 315    /* read xenstore entries */
 316    if (netdev->mac == NULL) {
 317        netdev->mac = xenstore_read_be_str(&netdev->xendev, "mac");
 318    }
 319
 320    /* do we have all we need? */
 321    if (netdev->mac == NULL) {
 322        return -1;
 323    }
 324
 325    if (net_parse_macaddr(netdev->conf.macaddr.a, netdev->mac) < 0) {
 326        return -1;
 327    }
 328
 329    netdev->conf.vlan = qemu_find_vlan(netdev->xendev.dev, 1);
 330    netdev->conf.peer = NULL;
 331
 332    netdev->nic = qemu_new_nic(&net_xen_info, &netdev->conf,
 333                               "xen", NULL, netdev);
 334
 335    snprintf(netdev->nic->nc.info_str, sizeof(netdev->nic->nc.info_str),
 336             "nic: xenbus vif macaddr=%s", netdev->mac);
 337
 338    /* fill info */
 339    xenstore_write_be_int(&netdev->xendev, "feature-rx-copy", 1);
 340    xenstore_write_be_int(&netdev->xendev, "feature-rx-flip", 0);
 341
 342    return 0;
 343}
 344
 345static int net_connect(struct XenDevice *xendev)
 346{
 347    struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev);
 348    int rx_copy;
 349
 350    if (xenstore_read_fe_int(&netdev->xendev, "tx-ring-ref",
 351                             &netdev->tx_ring_ref) == -1) {
 352        return -1;
 353    }
 354    if (xenstore_read_fe_int(&netdev->xendev, "rx-ring-ref",
 355                             &netdev->rx_ring_ref) == -1) {
 356        return 1;
 357    }
 358    if (xenstore_read_fe_int(&netdev->xendev, "event-channel",
 359                             &netdev->xendev.remote_port) == -1) {
 360        return -1;
 361    }
 362
 363    if (xenstore_read_fe_int(&netdev->xendev, "request-rx-copy", &rx_copy) == -1) {
 364        rx_copy = 0;
 365    }
 366    if (rx_copy == 0) {
 367        xen_be_printf(&netdev->xendev, 0, "frontend doesn't support rx-copy.\n");
 368        return -1;
 369    }
 370
 371    netdev->txs = xc_gnttab_map_grant_ref(netdev->xendev.gnttabdev,
 372                                          netdev->xendev.dom,
 373                                          netdev->tx_ring_ref,
 374                                          PROT_READ | PROT_WRITE);
 375    netdev->rxs = xc_gnttab_map_grant_ref(netdev->xendev.gnttabdev,
 376                                          netdev->xendev.dom,
 377                                          netdev->rx_ring_ref,
 378                                          PROT_READ | PROT_WRITE);
 379    if (!netdev->txs || !netdev->rxs) {
 380        return -1;
 381    }
 382    BACK_RING_INIT(&netdev->tx_ring, netdev->txs, XC_PAGE_SIZE);
 383    BACK_RING_INIT(&netdev->rx_ring, netdev->rxs, XC_PAGE_SIZE);
 384
 385    xen_be_bind_evtchn(&netdev->xendev);
 386
 387    xen_be_printf(&netdev->xendev, 1, "ok: tx-ring-ref %d, rx-ring-ref %d, "
 388                  "remote port %d, local port %d\n",
 389                  netdev->tx_ring_ref, netdev->rx_ring_ref,
 390                  netdev->xendev.remote_port, netdev->xendev.local_port);
 391
 392    net_tx_packets(netdev);
 393    return 0;
 394}
 395
 396static void net_disconnect(struct XenDevice *xendev)
 397{
 398    struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev);
 399
 400    xen_be_unbind_evtchn(&netdev->xendev);
 401
 402    if (netdev->txs) {
 403        xc_gnttab_munmap(netdev->xendev.gnttabdev, netdev->txs, 1);
 404        netdev->txs = NULL;
 405    }
 406    if (netdev->rxs) {
 407        xc_gnttab_munmap(netdev->xendev.gnttabdev, netdev->rxs, 1);
 408        netdev->rxs = NULL;
 409    }
 410    if (netdev->nic) {
 411        qemu_del_vlan_client(&netdev->nic->nc);
 412        netdev->nic = NULL;
 413    }
 414}
 415
 416static void net_event(struct XenDevice *xendev)
 417{
 418    struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev);
 419    net_tx_packets(netdev);
 420}
 421
 422static int net_free(struct XenDevice *xendev)
 423{
 424    struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev);
 425
 426    qemu_free(netdev->mac);
 427    return 0;
 428}
 429
 430/* ------------------------------------------------------------- */
 431
 432struct XenDevOps xen_netdev_ops = {
 433    .size       = sizeof(struct XenNetDev),
 434    .flags      = DEVOPS_FLAG_NEED_GNTDEV,
 435    .init       = net_init,
 436    .connect    = net_connect,
 437    .event      = net_event,
 438    .disconnect = net_disconnect,
 439    .free       = net_free,
 440};
 441