qemu/hw/net/virtio-net.c
<<
>>
Prefs
   1/*
   2 * Virtio Network Device
   3 *
   4 * Copyright IBM, Corp. 2007
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14#include "qemu/osdep.h"
  15#include "qemu/iov.h"
  16#include "hw/virtio/virtio.h"
  17#include "net/net.h"
  18#include "net/checksum.h"
  19#include "net/tap.h"
  20#include "qemu/error-report.h"
  21#include "qemu/timer.h"
  22#include "hw/virtio/virtio-net.h"
  23#include "net/vhost_net.h"
  24#include "hw/virtio/virtio-bus.h"
  25#include "qapi/qmp/qjson.h"
  26#include "qapi-event.h"
  27#include "hw/virtio/virtio-access.h"
  28#include "migration/misc.h"
  29
  30#define VIRTIO_NET_VM_VERSION    11
  31
  32#define MAC_TABLE_ENTRIES    64
  33#define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
  34
  35/* previously fixed value */
  36#define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
  37#define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
  38
  39/* for now, only allow larger queues; with virtio-1, guest can downsize */
  40#define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
  41#define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
  42
  43/*
  44 * Calculate the number of bytes up to and including the given 'field' of
  45 * 'container'.
  46 */
  47#define endof(container, field) \
  48    (offsetof(container, field) + sizeof(((container *)0)->field))
  49
  50typedef struct VirtIOFeature {
  51    uint32_t flags;
  52    size_t end;
  53} VirtIOFeature;
  54
  55static VirtIOFeature feature_sizes[] = {
  56    {.flags = 1 << VIRTIO_NET_F_MAC,
  57     .end = endof(struct virtio_net_config, mac)},
  58    {.flags = 1 << VIRTIO_NET_F_STATUS,
  59     .end = endof(struct virtio_net_config, status)},
  60    {.flags = 1 << VIRTIO_NET_F_MQ,
  61     .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
  62    {.flags = 1 << VIRTIO_NET_F_MTU,
  63     .end = endof(struct virtio_net_config, mtu)},
  64    {}
  65};
  66
  67static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
  68{
  69    VirtIONet *n = qemu_get_nic_opaque(nc);
  70
  71    return &n->vqs[nc->queue_index];
  72}
  73
  74static int vq2q(int queue_index)
  75{
  76    return queue_index / 2;
  77}
  78
  79/* TODO
  80 * - we could suppress RX interrupt if we were so inclined.
  81 */
  82
  83static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
  84{
  85    VirtIONet *n = VIRTIO_NET(vdev);
  86    struct virtio_net_config netcfg;
  87
  88    virtio_stw_p(vdev, &netcfg.status, n->status);
  89    virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
  90    virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
  91    memcpy(netcfg.mac, n->mac, ETH_ALEN);
  92    memcpy(config, &netcfg, n->config_size);
  93}
  94
  95static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
  96{
  97    VirtIONet *n = VIRTIO_NET(vdev);
  98    struct virtio_net_config netcfg = {};
  99
 100    memcpy(&netcfg, config, n->config_size);
 101
 102    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
 103        !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
 104        memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
 105        memcpy(n->mac, netcfg.mac, ETH_ALEN);
 106        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 107    }
 108}
 109
 110static bool virtio_net_started(VirtIONet *n, uint8_t status)
 111{
 112    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 113    return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 114        (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
 115}
 116
 117static void virtio_net_announce_timer(void *opaque)
 118{
 119    VirtIONet *n = opaque;
 120    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 121
 122    n->announce_counter--;
 123    n->status |= VIRTIO_NET_S_ANNOUNCE;
 124    virtio_notify_config(vdev);
 125}
 126
 127static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
 128{
 129    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 130    NetClientState *nc = qemu_get_queue(n->nic);
 131    int queues = n->multiqueue ? n->max_queues : 1;
 132
 133    if (!get_vhost_net(nc->peer)) {
 134        return;
 135    }
 136
 137    if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
 138        !!n->vhost_started) {
 139        return;
 140    }
 141    if (!n->vhost_started) {
 142        int r, i;
 143
 144        if (n->needs_vnet_hdr_swap) {
 145            error_report("backend does not support %s vnet headers; "
 146                         "falling back on userspace virtio",
 147                         virtio_is_big_endian(vdev) ? "BE" : "LE");
 148            return;
 149        }
 150
 151        /* Any packets outstanding? Purge them to avoid touching rings
 152         * when vhost is running.
 153         */
 154        for (i = 0;  i < queues; i++) {
 155            NetClientState *qnc = qemu_get_subqueue(n->nic, i);
 156
 157            /* Purge both directions: TX and RX. */
 158            qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
 159            qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
 160        }
 161
 162        if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
 163            r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
 164            if (r < 0) {
 165                error_report("%uBytes MTU not supported by the backend",
 166                             n->net_conf.mtu);
 167
 168                return;
 169            }
 170        }
 171
 172        n->vhost_started = 1;
 173        r = vhost_net_start(vdev, n->nic->ncs, queues);
 174        if (r < 0) {
 175            error_report("unable to start vhost net: %d: "
 176                         "falling back on userspace virtio", -r);
 177            n->vhost_started = 0;
 178        }
 179    } else {
 180        vhost_net_stop(vdev, n->nic->ncs, queues);
 181        n->vhost_started = 0;
 182    }
 183}
 184
 185static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
 186                                          NetClientState *peer,
 187                                          bool enable)
 188{
 189    if (virtio_is_big_endian(vdev)) {
 190        return qemu_set_vnet_be(peer, enable);
 191    } else {
 192        return qemu_set_vnet_le(peer, enable);
 193    }
 194}
 195
 196static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
 197                                       int queues, bool enable)
 198{
 199    int i;
 200
 201    for (i = 0; i < queues; i++) {
 202        if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
 203            enable) {
 204            while (--i >= 0) {
 205                virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
 206            }
 207
 208            return true;
 209        }
 210    }
 211
 212    return false;
 213}
 214
 215static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
 216{
 217    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 218    int queues = n->multiqueue ? n->max_queues : 1;
 219
 220    if (virtio_net_started(n, status)) {
 221        /* Before using the device, we tell the network backend about the
 222         * endianness to use when parsing vnet headers. If the backend
 223         * can't do it, we fallback onto fixing the headers in the core
 224         * virtio-net code.
 225         */
 226        n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
 227                                                            queues, true);
 228    } else if (virtio_net_started(n, vdev->status)) {
 229        /* After using the device, we need to reset the network backend to
 230         * the default (guest native endianness), otherwise the guest may
 231         * lose network connectivity if it is rebooted into a different
 232         * endianness.
 233         */
 234        virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
 235    }
 236}
 237
 238static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
 239{
 240    unsigned int dropped = virtqueue_drop_all(vq);
 241    if (dropped) {
 242        virtio_notify(vdev, vq);
 243    }
 244}
 245
 246static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
 247{
 248    VirtIONet *n = VIRTIO_NET(vdev);
 249    VirtIONetQueue *q;
 250    int i;
 251    uint8_t queue_status;
 252
 253    virtio_net_vnet_endian_status(n, status);
 254    virtio_net_vhost_status(n, status);
 255
 256    for (i = 0; i < n->max_queues; i++) {
 257        NetClientState *ncs = qemu_get_subqueue(n->nic, i);
 258        bool queue_started;
 259        q = &n->vqs[i];
 260
 261        if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
 262            queue_status = 0;
 263        } else {
 264            queue_status = status;
 265        }
 266        queue_started =
 267            virtio_net_started(n, queue_status) && !n->vhost_started;
 268
 269        if (queue_started) {
 270            qemu_flush_queued_packets(ncs);
 271        }
 272
 273        if (!q->tx_waiting) {
 274            continue;
 275        }
 276
 277        if (queue_started) {
 278            if (q->tx_timer) {
 279                timer_mod(q->tx_timer,
 280                               qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
 281            } else {
 282                qemu_bh_schedule(q->tx_bh);
 283            }
 284        } else {
 285            if (q->tx_timer) {
 286                timer_del(q->tx_timer);
 287            } else {
 288                qemu_bh_cancel(q->tx_bh);
 289            }
 290            if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
 291                (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 292                vdev->vm_running) {
 293                /* if tx is waiting we are likely have some packets in tx queue
 294                 * and disabled notification */
 295                q->tx_waiting = 0;
 296                virtio_queue_set_notification(q->tx_vq, 1);
 297                virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
 298            }
 299        }
 300    }
 301}
 302
 303static void virtio_net_set_link_status(NetClientState *nc)
 304{
 305    VirtIONet *n = qemu_get_nic_opaque(nc);
 306    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 307    uint16_t old_status = n->status;
 308
 309    if (nc->link_down)
 310        n->status &= ~VIRTIO_NET_S_LINK_UP;
 311    else
 312        n->status |= VIRTIO_NET_S_LINK_UP;
 313
 314    if (n->status != old_status)
 315        virtio_notify_config(vdev);
 316
 317    virtio_net_set_status(vdev, vdev->status);
 318}
 319
 320static void rxfilter_notify(NetClientState *nc)
 321{
 322    VirtIONet *n = qemu_get_nic_opaque(nc);
 323
 324    if (nc->rxfilter_notify_enabled) {
 325        gchar *path = object_get_canonical_path(OBJECT(n->qdev));
 326        qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
 327                                              n->netclient_name, path, &error_abort);
 328        g_free(path);
 329
 330        /* disable event notification to avoid events flooding */
 331        nc->rxfilter_notify_enabled = 0;
 332    }
 333}
 334
 335static intList *get_vlan_table(VirtIONet *n)
 336{
 337    intList *list, *entry;
 338    int i, j;
 339
 340    list = NULL;
 341    for (i = 0; i < MAX_VLAN >> 5; i++) {
 342        for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
 343            if (n->vlans[i] & (1U << j)) {
 344                entry = g_malloc0(sizeof(*entry));
 345                entry->value = (i << 5) + j;
 346                entry->next = list;
 347                list = entry;
 348            }
 349        }
 350    }
 351
 352    return list;
 353}
 354
 355static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
 356{
 357    VirtIONet *n = qemu_get_nic_opaque(nc);
 358    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 359    RxFilterInfo *info;
 360    strList *str_list, *entry;
 361    int i;
 362
 363    info = g_malloc0(sizeof(*info));
 364    info->name = g_strdup(nc->name);
 365    info->promiscuous = n->promisc;
 366
 367    if (n->nouni) {
 368        info->unicast = RX_STATE_NONE;
 369    } else if (n->alluni) {
 370        info->unicast = RX_STATE_ALL;
 371    } else {
 372        info->unicast = RX_STATE_NORMAL;
 373    }
 374
 375    if (n->nomulti) {
 376        info->multicast = RX_STATE_NONE;
 377    } else if (n->allmulti) {
 378        info->multicast = RX_STATE_ALL;
 379    } else {
 380        info->multicast = RX_STATE_NORMAL;
 381    }
 382
 383    info->broadcast_allowed = n->nobcast;
 384    info->multicast_overflow = n->mac_table.multi_overflow;
 385    info->unicast_overflow = n->mac_table.uni_overflow;
 386
 387    info->main_mac = qemu_mac_strdup_printf(n->mac);
 388
 389    str_list = NULL;
 390    for (i = 0; i < n->mac_table.first_multi; i++) {
 391        entry = g_malloc0(sizeof(*entry));
 392        entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
 393        entry->next = str_list;
 394        str_list = entry;
 395    }
 396    info->unicast_table = str_list;
 397
 398    str_list = NULL;
 399    for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
 400        entry = g_malloc0(sizeof(*entry));
 401        entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
 402        entry->next = str_list;
 403        str_list = entry;
 404    }
 405    info->multicast_table = str_list;
 406    info->vlan_table = get_vlan_table(n);
 407
 408    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
 409        info->vlan = RX_STATE_ALL;
 410    } else if (!info->vlan_table) {
 411        info->vlan = RX_STATE_NONE;
 412    } else {
 413        info->vlan = RX_STATE_NORMAL;
 414    }
 415
 416    /* enable event notification after query */
 417    nc->rxfilter_notify_enabled = 1;
 418
 419    return info;
 420}
 421
 422static void virtio_net_reset(VirtIODevice *vdev)
 423{
 424    VirtIONet *n = VIRTIO_NET(vdev);
 425
 426    /* Reset back to compatibility mode */
 427    n->promisc = 1;
 428    n->allmulti = 0;
 429    n->alluni = 0;
 430    n->nomulti = 0;
 431    n->nouni = 0;
 432    n->nobcast = 0;
 433    /* multiqueue is disabled by default */
 434    n->curr_queues = 1;
 435    timer_del(n->announce_timer);
 436    n->announce_counter = 0;
 437    n->status &= ~VIRTIO_NET_S_ANNOUNCE;
 438
 439    /* Flush any MAC and VLAN filter table state */
 440    n->mac_table.in_use = 0;
 441    n->mac_table.first_multi = 0;
 442    n->mac_table.multi_overflow = 0;
 443    n->mac_table.uni_overflow = 0;
 444    memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
 445    memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
 446    qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 447    memset(n->vlans, 0, MAX_VLAN >> 3);
 448}
 449
 450static void peer_test_vnet_hdr(VirtIONet *n)
 451{
 452    NetClientState *nc = qemu_get_queue(n->nic);
 453    if (!nc->peer) {
 454        return;
 455    }
 456
 457    n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
 458}
 459
 460static int peer_has_vnet_hdr(VirtIONet *n)
 461{
 462    return n->has_vnet_hdr;
 463}
 464
 465static int peer_has_ufo(VirtIONet *n)
 466{
 467    if (!peer_has_vnet_hdr(n))
 468        return 0;
 469
 470    n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
 471
 472    return n->has_ufo;
 473}
 474
 475static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
 476                                       int version_1)
 477{
 478    int i;
 479    NetClientState *nc;
 480
 481    n->mergeable_rx_bufs = mergeable_rx_bufs;
 482
 483    if (version_1) {
 484        n->guest_hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
 485    } else {
 486        n->guest_hdr_len = n->mergeable_rx_bufs ?
 487            sizeof(struct virtio_net_hdr_mrg_rxbuf) :
 488            sizeof(struct virtio_net_hdr);
 489    }
 490
 491    for (i = 0; i < n->max_queues; i++) {
 492        nc = qemu_get_subqueue(n->nic, i);
 493
 494        if (peer_has_vnet_hdr(n) &&
 495            qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
 496            qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
 497            n->host_hdr_len = n->guest_hdr_len;
 498        }
 499    }
 500}
 501
 502static int virtio_net_max_tx_queue_size(VirtIONet *n)
 503{
 504    NetClientState *peer = n->nic_conf.peers.ncs[0];
 505
 506    /*
 507     * Backends other than vhost-user don't support max queue size.
 508     */
 509    if (!peer) {
 510        return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
 511    }
 512
 513    if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
 514        return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
 515    }
 516
 517    return VIRTQUEUE_MAX_SIZE;
 518}
 519
 520static int peer_attach(VirtIONet *n, int index)
 521{
 522    NetClientState *nc = qemu_get_subqueue(n->nic, index);
 523
 524    if (!nc->peer) {
 525        return 0;
 526    }
 527
 528    if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 529        vhost_set_vring_enable(nc->peer, 1);
 530    }
 531
 532    if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
 533        return 0;
 534    }
 535
 536    if (n->max_queues == 1) {
 537        return 0;
 538    }
 539
 540    return tap_enable(nc->peer);
 541}
 542
 543static int peer_detach(VirtIONet *n, int index)
 544{
 545    NetClientState *nc = qemu_get_subqueue(n->nic, index);
 546
 547    if (!nc->peer) {
 548        return 0;
 549    }
 550
 551    if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 552        vhost_set_vring_enable(nc->peer, 0);
 553    }
 554
 555    if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
 556        return 0;
 557    }
 558
 559    return tap_disable(nc->peer);
 560}
 561
 562static void virtio_net_set_queues(VirtIONet *n)
 563{
 564    int i;
 565    int r;
 566
 567    if (n->nic->peer_deleted) {
 568        return;
 569    }
 570
 571    for (i = 0; i < n->max_queues; i++) {
 572        if (i < n->curr_queues) {
 573            r = peer_attach(n, i);
 574            assert(!r);
 575        } else {
 576            r = peer_detach(n, i);
 577            assert(!r);
 578        }
 579    }
 580}
 581
 582static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
 583
 584static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
 585                                        Error **errp)
 586{
 587    VirtIONet *n = VIRTIO_NET(vdev);
 588    NetClientState *nc = qemu_get_queue(n->nic);
 589
 590    /* Firstly sync all virtio-net possible supported features */
 591    features |= n->host_features;
 592
 593    virtio_add_feature(&features, VIRTIO_NET_F_MAC);
 594
 595    if (!peer_has_vnet_hdr(n)) {
 596        virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
 597        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
 598        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
 599        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
 600
 601        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
 602        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
 603        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
 604        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
 605    }
 606
 607    if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
 608        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
 609        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
 610    }
 611
 612    if (!get_vhost_net(nc->peer)) {
 613        return features;
 614    }
 615    features = vhost_net_get_features(get_vhost_net(nc->peer), features);
 616    vdev->backend_features = features;
 617
 618    if (n->mtu_bypass_backend &&
 619            (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
 620        features |= (1ULL << VIRTIO_NET_F_MTU);
 621    }
 622
 623    return features;
 624}
 625
 626static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
 627{
 628    uint64_t features = 0;
 629
 630    /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
 631     * but also these: */
 632    virtio_add_feature(&features, VIRTIO_NET_F_MAC);
 633    virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
 634    virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
 635    virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
 636    virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
 637
 638    return features;
 639}
 640
 641static void virtio_net_apply_guest_offloads(VirtIONet *n)
 642{
 643    qemu_set_offload(qemu_get_queue(n->nic)->peer,
 644            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
 645            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
 646            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
 647            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
 648            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
 649}
 650
 651static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
 652{
 653    static const uint64_t guest_offloads_mask =
 654        (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
 655        (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
 656        (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
 657        (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
 658        (1ULL << VIRTIO_NET_F_GUEST_UFO);
 659
 660    return guest_offloads_mask & features;
 661}
 662
 663static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
 664{
 665    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 666    return virtio_net_guest_offloads_by_features(vdev->guest_features);
 667}
 668
 669static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
 670{
 671    VirtIONet *n = VIRTIO_NET(vdev);
 672    int i;
 673
 674    if (n->mtu_bypass_backend &&
 675            !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
 676        features &= ~(1ULL << VIRTIO_NET_F_MTU);
 677    }
 678
 679    virtio_net_set_multiqueue(n,
 680                              virtio_has_feature(features, VIRTIO_NET_F_MQ));
 681
 682    virtio_net_set_mrg_rx_bufs(n,
 683                               virtio_has_feature(features,
 684                                                  VIRTIO_NET_F_MRG_RXBUF),
 685                               virtio_has_feature(features,
 686                                                  VIRTIO_F_VERSION_1));
 687
 688    if (n->has_vnet_hdr) {
 689        n->curr_guest_offloads =
 690            virtio_net_guest_offloads_by_features(features);
 691        virtio_net_apply_guest_offloads(n);
 692    }
 693
 694    for (i = 0;  i < n->max_queues; i++) {
 695        NetClientState *nc = qemu_get_subqueue(n->nic, i);
 696
 697        if (!get_vhost_net(nc->peer)) {
 698            continue;
 699        }
 700        vhost_net_ack_features(get_vhost_net(nc->peer), features);
 701    }
 702
 703    if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
 704        memset(n->vlans, 0, MAX_VLAN >> 3);
 705    } else {
 706        memset(n->vlans, 0xff, MAX_VLAN >> 3);
 707    }
 708}
 709
 710static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
 711                                     struct iovec *iov, unsigned int iov_cnt)
 712{
 713    uint8_t on;
 714    size_t s;
 715    NetClientState *nc = qemu_get_queue(n->nic);
 716
 717    s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
 718    if (s != sizeof(on)) {
 719        return VIRTIO_NET_ERR;
 720    }
 721
 722    if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
 723        n->promisc = on;
 724    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
 725        n->allmulti = on;
 726    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
 727        n->alluni = on;
 728    } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
 729        n->nomulti = on;
 730    } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
 731        n->nouni = on;
 732    } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
 733        n->nobcast = on;
 734    } else {
 735        return VIRTIO_NET_ERR;
 736    }
 737
 738    rxfilter_notify(nc);
 739
 740    return VIRTIO_NET_OK;
 741}
 742
 743static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
 744                                     struct iovec *iov, unsigned int iov_cnt)
 745{
 746    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 747    uint64_t offloads;
 748    size_t s;
 749
 750    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
 751        return VIRTIO_NET_ERR;
 752    }
 753
 754    s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
 755    if (s != sizeof(offloads)) {
 756        return VIRTIO_NET_ERR;
 757    }
 758
 759    if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
 760        uint64_t supported_offloads;
 761
 762        offloads = virtio_ldq_p(vdev, &offloads);
 763
 764        if (!n->has_vnet_hdr) {
 765            return VIRTIO_NET_ERR;
 766        }
 767
 768        supported_offloads = virtio_net_supported_guest_offloads(n);
 769        if (offloads & ~supported_offloads) {
 770            return VIRTIO_NET_ERR;
 771        }
 772
 773        n->curr_guest_offloads = offloads;
 774        virtio_net_apply_guest_offloads(n);
 775
 776        return VIRTIO_NET_OK;
 777    } else {
 778        return VIRTIO_NET_ERR;
 779    }
 780}
 781
 782static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
 783                                 struct iovec *iov, unsigned int iov_cnt)
 784{
 785    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 786    struct virtio_net_ctrl_mac mac_data;
 787    size_t s;
 788    NetClientState *nc = qemu_get_queue(n->nic);
 789
 790    if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
 791        if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
 792            return VIRTIO_NET_ERR;
 793        }
 794        s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
 795        assert(s == sizeof(n->mac));
 796        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 797        rxfilter_notify(nc);
 798
 799        return VIRTIO_NET_OK;
 800    }
 801
 802    if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
 803        return VIRTIO_NET_ERR;
 804    }
 805
 806    int in_use = 0;
 807    int first_multi = 0;
 808    uint8_t uni_overflow = 0;
 809    uint8_t multi_overflow = 0;
 810    uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
 811
 812    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
 813                   sizeof(mac_data.entries));
 814    mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
 815    if (s != sizeof(mac_data.entries)) {
 816        goto error;
 817    }
 818    iov_discard_front(&iov, &iov_cnt, s);
 819
 820    if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
 821        goto error;
 822    }
 823
 824    if (mac_data.entries <= MAC_TABLE_ENTRIES) {
 825        s = iov_to_buf(iov, iov_cnt, 0, macs,
 826                       mac_data.entries * ETH_ALEN);
 827        if (s != mac_data.entries * ETH_ALEN) {
 828            goto error;
 829        }
 830        in_use += mac_data.entries;
 831    } else {
 832        uni_overflow = 1;
 833    }
 834
 835    iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
 836
 837    first_multi = in_use;
 838
 839    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
 840                   sizeof(mac_data.entries));
 841    mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
 842    if (s != sizeof(mac_data.entries)) {
 843        goto error;
 844    }
 845
 846    iov_discard_front(&iov, &iov_cnt, s);
 847
 848    if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
 849        goto error;
 850    }
 851
 852    if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
 853        s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
 854                       mac_data.entries * ETH_ALEN);
 855        if (s != mac_data.entries * ETH_ALEN) {
 856            goto error;
 857        }
 858        in_use += mac_data.entries;
 859    } else {
 860        multi_overflow = 1;
 861    }
 862
 863    n->mac_table.in_use = in_use;
 864    n->mac_table.first_multi = first_multi;
 865    n->mac_table.uni_overflow = uni_overflow;
 866    n->mac_table.multi_overflow = multi_overflow;
 867    memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
 868    g_free(macs);
 869    rxfilter_notify(nc);
 870
 871    return VIRTIO_NET_OK;
 872
 873error:
 874    g_free(macs);
 875    return VIRTIO_NET_ERR;
 876}
 877
 878static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
 879                                        struct iovec *iov, unsigned int iov_cnt)
 880{
 881    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 882    uint16_t vid;
 883    size_t s;
 884    NetClientState *nc = qemu_get_queue(n->nic);
 885
 886    s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
 887    vid = virtio_lduw_p(vdev, &vid);
 888    if (s != sizeof(vid)) {
 889        return VIRTIO_NET_ERR;
 890    }
 891
 892    if (vid >= MAX_VLAN)
 893        return VIRTIO_NET_ERR;
 894
 895    if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
 896        n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
 897    else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
 898        n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
 899    else
 900        return VIRTIO_NET_ERR;
 901
 902    rxfilter_notify(nc);
 903
 904    return VIRTIO_NET_OK;
 905}
 906
 907static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
 908                                      struct iovec *iov, unsigned int iov_cnt)
 909{
 910    if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
 911        n->status & VIRTIO_NET_S_ANNOUNCE) {
 912        n->status &= ~VIRTIO_NET_S_ANNOUNCE;
 913        if (n->announce_counter) {
 914            timer_mod(n->announce_timer,
 915                      qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
 916                      self_announce_delay(n->announce_counter));
 917        }
 918        return VIRTIO_NET_OK;
 919    } else {
 920        return VIRTIO_NET_ERR;
 921    }
 922}
 923
 924static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
 925                                struct iovec *iov, unsigned int iov_cnt)
 926{
 927    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 928    struct virtio_net_ctrl_mq mq;
 929    size_t s;
 930    uint16_t queues;
 931
 932    s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
 933    if (s != sizeof(mq)) {
 934        return VIRTIO_NET_ERR;
 935    }
 936
 937    if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
 938        return VIRTIO_NET_ERR;
 939    }
 940
 941    queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
 942
 943    if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
 944        queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
 945        queues > n->max_queues ||
 946        !n->multiqueue) {
 947        return VIRTIO_NET_ERR;
 948    }
 949
 950    n->curr_queues = queues;
 951    /* stop the backend before changing the number of queues to avoid handling a
 952     * disabled queue */
 953    virtio_net_set_status(vdev, vdev->status);
 954    virtio_net_set_queues(n);
 955
 956    return VIRTIO_NET_OK;
 957}
 958
 959static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
 960{
 961    VirtIONet *n = VIRTIO_NET(vdev);
 962    struct virtio_net_ctrl_hdr ctrl;
 963    virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
 964    VirtQueueElement *elem;
 965    size_t s;
 966    struct iovec *iov, *iov2;
 967    unsigned int iov_cnt;
 968
 969    for (;;) {
 970        elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
 971        if (!elem) {
 972            break;
 973        }
 974        if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
 975            iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
 976            virtio_error(vdev, "virtio-net ctrl missing headers");
 977            virtqueue_detach_element(vq, elem, 0);
 978            g_free(elem);
 979            break;
 980        }
 981
 982        iov_cnt = elem->out_num;
 983        iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
 984        s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
 985        iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
 986        if (s != sizeof(ctrl)) {
 987            status = VIRTIO_NET_ERR;
 988        } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
 989            status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
 990        } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
 991            status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
 992        } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
 993            status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
 994        } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
 995            status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
 996        } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
 997            status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
 998        } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
 999            status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
1000        }
1001
1002        s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
1003        assert(s == sizeof(status));
1004
1005        virtqueue_push(vq, elem, sizeof(status));
1006        virtio_notify(vdev, vq);
1007        g_free(iov2);
1008        g_free(elem);
1009    }
1010}
1011
1012/* RX */
1013
1014static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1015{
1016    VirtIONet *n = VIRTIO_NET(vdev);
1017    int queue_index = vq2q(virtio_get_queue_index(vq));
1018
1019    qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1020}
1021
1022static int virtio_net_can_receive(NetClientState *nc)
1023{
1024    VirtIONet *n = qemu_get_nic_opaque(nc);
1025    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1026    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1027
1028    if (!vdev->vm_running) {
1029        return 0;
1030    }
1031
1032    if (nc->queue_index >= n->curr_queues) {
1033        return 0;
1034    }
1035
1036    if (!virtio_queue_ready(q->rx_vq) ||
1037        !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1038        return 0;
1039    }
1040
1041    return 1;
1042}
1043
1044static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1045{
1046    VirtIONet *n = q->n;
1047    if (virtio_queue_empty(q->rx_vq) ||
1048        (n->mergeable_rx_bufs &&
1049         !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1050        virtio_queue_set_notification(q->rx_vq, 1);
1051
1052        /* To avoid a race condition where the guest has made some buffers
1053         * available after the above check but before notification was
1054         * enabled, check for available buffers again.
1055         */
1056        if (virtio_queue_empty(q->rx_vq) ||
1057            (n->mergeable_rx_bufs &&
1058             !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1059            return 0;
1060        }
1061    }
1062
1063    virtio_queue_set_notification(q->rx_vq, 0);
1064    return 1;
1065}
1066
1067static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1068{
1069    virtio_tswap16s(vdev, &hdr->hdr_len);
1070    virtio_tswap16s(vdev, &hdr->gso_size);
1071    virtio_tswap16s(vdev, &hdr->csum_start);
1072    virtio_tswap16s(vdev, &hdr->csum_offset);
1073}
1074
1075/* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1076 * it never finds out that the packets don't have valid checksums.  This
1077 * causes dhclient to get upset.  Fedora's carried a patch for ages to
1078 * fix this with Xen but it hasn't appeared in an upstream release of
1079 * dhclient yet.
1080 *
1081 * To avoid breaking existing guests, we catch udp packets and add
1082 * checksums.  This is terrible but it's better than hacking the guest
1083 * kernels.
1084 *
1085 * N.B. if we introduce a zero-copy API, this operation is no longer free so
1086 * we should provide a mechanism to disable it to avoid polluting the host
1087 * cache.
1088 */
1089static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1090                                        uint8_t *buf, size_t size)
1091{
1092    if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1093        (size > 27 && size < 1500) && /* normal sized MTU */
1094        (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1095        (buf[23] == 17) && /* ip.protocol == UDP */
1096        (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1097        net_checksum_calculate(buf, size);
1098        hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1099    }
1100}
1101
1102static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1103                           const void *buf, size_t size)
1104{
1105    if (n->has_vnet_hdr) {
1106        /* FIXME this cast is evil */
1107        void *wbuf = (void *)buf;
1108        work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1109                                    size - n->host_hdr_len);
1110
1111        if (n->needs_vnet_hdr_swap) {
1112            virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1113        }
1114        iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1115    } else {
1116        struct virtio_net_hdr hdr = {
1117            .flags = 0,
1118            .gso_type = VIRTIO_NET_HDR_GSO_NONE
1119        };
1120        iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1121    }
1122}
1123
1124static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1125{
1126    static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1127    static const uint8_t vlan[] = {0x81, 0x00};
1128    uint8_t *ptr = (uint8_t *)buf;
1129    int i;
1130
1131    if (n->promisc)
1132        return 1;
1133
1134    ptr += n->host_hdr_len;
1135
1136    if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1137        int vid = lduw_be_p(ptr + 14) & 0xfff;
1138        if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1139            return 0;
1140    }
1141
1142    if (ptr[0] & 1) { // multicast
1143        if (!memcmp(ptr, bcast, sizeof(bcast))) {
1144            return !n->nobcast;
1145        } else if (n->nomulti) {
1146            return 0;
1147        } else if (n->allmulti || n->mac_table.multi_overflow) {
1148            return 1;
1149        }
1150
1151        for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1152            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1153                return 1;
1154            }
1155        }
1156    } else { // unicast
1157        if (n->nouni) {
1158            return 0;
1159        } else if (n->alluni || n->mac_table.uni_overflow) {
1160            return 1;
1161        } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1162            return 1;
1163        }
1164
1165        for (i = 0; i < n->mac_table.first_multi; i++) {
1166            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1167                return 1;
1168            }
1169        }
1170    }
1171
1172    return 0;
1173}
1174
1175static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1176                                      size_t size)
1177{
1178    VirtIONet *n = qemu_get_nic_opaque(nc);
1179    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1180    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1181    struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1182    struct virtio_net_hdr_mrg_rxbuf mhdr;
1183    unsigned mhdr_cnt = 0;
1184    size_t offset, i, guest_offset;
1185
1186    if (!virtio_net_can_receive(nc)) {
1187        return -1;
1188    }
1189
1190    /* hdr_len refers to the header we supply to the guest */
1191    if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1192        return 0;
1193    }
1194
1195    if (!receive_filter(n, buf, size))
1196        return size;
1197
1198    offset = i = 0;
1199
1200    while (offset < size) {
1201        VirtQueueElement *elem;
1202        int len, total;
1203        const struct iovec *sg;
1204
1205        total = 0;
1206
1207        elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1208        if (!elem) {
1209            if (i) {
1210                virtio_error(vdev, "virtio-net unexpected empty queue: "
1211                             "i %zd mergeable %d offset %zd, size %zd, "
1212                             "guest hdr len %zd, host hdr len %zd "
1213                             "guest features 0x%" PRIx64,
1214                             i, n->mergeable_rx_bufs, offset, size,
1215                             n->guest_hdr_len, n->host_hdr_len,
1216                             vdev->guest_features);
1217            }
1218            return -1;
1219        }
1220
1221        if (elem->in_num < 1) {
1222            virtio_error(vdev,
1223                         "virtio-net receive queue contains no in buffers");
1224            virtqueue_detach_element(q->rx_vq, elem, 0);
1225            g_free(elem);
1226            return -1;
1227        }
1228
1229        sg = elem->in_sg;
1230        if (i == 0) {
1231            assert(offset == 0);
1232            if (n->mergeable_rx_bufs) {
1233                mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1234                                    sg, elem->in_num,
1235                                    offsetof(typeof(mhdr), num_buffers),
1236                                    sizeof(mhdr.num_buffers));
1237            }
1238
1239            receive_header(n, sg, elem->in_num, buf, size);
1240            offset = n->host_hdr_len;
1241            total += n->guest_hdr_len;
1242            guest_offset = n->guest_hdr_len;
1243        } else {
1244            guest_offset = 0;
1245        }
1246
1247        /* copy in packet.  ugh */
1248        len = iov_from_buf(sg, elem->in_num, guest_offset,
1249                           buf + offset, size - offset);
1250        total += len;
1251        offset += len;
1252        /* If buffers can't be merged, at this point we
1253         * must have consumed the complete packet.
1254         * Otherwise, drop it. */
1255        if (!n->mergeable_rx_bufs && offset < size) {
1256            virtqueue_unpop(q->rx_vq, elem, total);
1257            g_free(elem);
1258            return size;
1259        }
1260
1261        /* signal other side */
1262        virtqueue_fill(q->rx_vq, elem, total, i++);
1263        g_free(elem);
1264    }
1265
1266    if (mhdr_cnt) {
1267        virtio_stw_p(vdev, &mhdr.num_buffers, i);
1268        iov_from_buf(mhdr_sg, mhdr_cnt,
1269                     0,
1270                     &mhdr.num_buffers, sizeof mhdr.num_buffers);
1271    }
1272
1273    virtqueue_flush(q->rx_vq, i);
1274    virtio_notify(vdev, q->rx_vq);
1275
1276    return size;
1277}
1278
1279static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
1280                                  size_t size)
1281{
1282    ssize_t r;
1283
1284    rcu_read_lock();
1285    r = virtio_net_receive_rcu(nc, buf, size);
1286    rcu_read_unlock();
1287    return r;
1288}
1289
1290static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
1291
1292static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
1293{
1294    VirtIONet *n = qemu_get_nic_opaque(nc);
1295    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1296    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1297
1298    virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
1299    virtio_notify(vdev, q->tx_vq);
1300
1301    g_free(q->async_tx.elem);
1302    q->async_tx.elem = NULL;
1303
1304    virtio_queue_set_notification(q->tx_vq, 1);
1305    virtio_net_flush_tx(q);
1306}
1307
1308/* TX */
1309static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
1310{
1311    VirtIONet *n = q->n;
1312    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1313    VirtQueueElement *elem;
1314    int32_t num_packets = 0;
1315    int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
1316    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1317        return num_packets;
1318    }
1319
1320    if (q->async_tx.elem) {
1321        virtio_queue_set_notification(q->tx_vq, 0);
1322        return num_packets;
1323    }
1324
1325    for (;;) {
1326        ssize_t ret;
1327        unsigned int out_num;
1328        struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
1329        struct virtio_net_hdr_mrg_rxbuf mhdr;
1330
1331        elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
1332        if (!elem) {
1333            break;
1334        }
1335
1336        out_num = elem->out_num;
1337        out_sg = elem->out_sg;
1338        if (out_num < 1) {
1339            virtio_error(vdev, "virtio-net header not in first element");
1340            virtqueue_detach_element(q->tx_vq, elem, 0);
1341            g_free(elem);
1342            return -EINVAL;
1343        }
1344
1345        if (n->has_vnet_hdr) {
1346            if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
1347                n->guest_hdr_len) {
1348                virtio_error(vdev, "virtio-net header incorrect");
1349                virtqueue_detach_element(q->tx_vq, elem, 0);
1350                g_free(elem);
1351                return -EINVAL;
1352            }
1353            if (n->needs_vnet_hdr_swap) {
1354                virtio_net_hdr_swap(vdev, (void *) &mhdr);
1355                sg2[0].iov_base = &mhdr;
1356                sg2[0].iov_len = n->guest_hdr_len;
1357                out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
1358                                   out_sg, out_num,
1359                                   n->guest_hdr_len, -1);
1360                if (out_num == VIRTQUEUE_MAX_SIZE) {
1361                    goto drop;
1362                }
1363                out_num += 1;
1364                out_sg = sg2;
1365            }
1366        }
1367        /*
1368         * If host wants to see the guest header as is, we can
1369         * pass it on unchanged. Otherwise, copy just the parts
1370         * that host is interested in.
1371         */
1372        assert(n->host_hdr_len <= n->guest_hdr_len);
1373        if (n->host_hdr_len != n->guest_hdr_len) {
1374            unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
1375                                       out_sg, out_num,
1376                                       0, n->host_hdr_len);
1377            sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
1378                             out_sg, out_num,
1379                             n->guest_hdr_len, -1);
1380            out_num = sg_num;
1381            out_sg = sg;
1382        }
1383
1384        ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
1385                                      out_sg, out_num, virtio_net_tx_complete);
1386        if (ret == 0) {
1387            virtio_queue_set_notification(q->tx_vq, 0);
1388            q->async_tx.elem = elem;
1389            return -EBUSY;
1390        }
1391
1392drop:
1393        virtqueue_push(q->tx_vq, elem, 0);
1394        virtio_notify(vdev, q->tx_vq);
1395        g_free(elem);
1396
1397        if (++num_packets >= n->tx_burst) {
1398            break;
1399        }
1400    }
1401    return num_packets;
1402}
1403
1404static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
1405{
1406    VirtIONet *n = VIRTIO_NET(vdev);
1407    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
1408
1409    if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
1410        virtio_net_drop_tx_queue_data(vdev, vq);
1411        return;
1412    }
1413
1414    /* This happens when device was stopped but VCPU wasn't. */
1415    if (!vdev->vm_running) {
1416        q->tx_waiting = 1;
1417        return;
1418    }
1419
1420    if (q->tx_waiting) {
1421        virtio_queue_set_notification(vq, 1);
1422        timer_del(q->tx_timer);
1423        q->tx_waiting = 0;
1424        if (virtio_net_flush_tx(q) == -EINVAL) {
1425            return;
1426        }
1427    } else {
1428        timer_mod(q->tx_timer,
1429                       qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
1430        q->tx_waiting = 1;
1431        virtio_queue_set_notification(vq, 0);
1432    }
1433}
1434
1435static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
1436{
1437    VirtIONet *n = VIRTIO_NET(vdev);
1438    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
1439
1440    if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
1441        virtio_net_drop_tx_queue_data(vdev, vq);
1442        return;
1443    }
1444
1445    if (unlikely(q->tx_waiting)) {
1446        return;
1447    }
1448    q->tx_waiting = 1;
1449    /* This happens when device was stopped but VCPU wasn't. */
1450    if (!vdev->vm_running) {
1451        return;
1452    }
1453    virtio_queue_set_notification(vq, 0);
1454    qemu_bh_schedule(q->tx_bh);
1455}
1456
1457static void virtio_net_tx_timer(void *opaque)
1458{
1459    VirtIONetQueue *q = opaque;
1460    VirtIONet *n = q->n;
1461    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1462    /* This happens when device was stopped but BH wasn't. */
1463    if (!vdev->vm_running) {
1464        /* Make sure tx waiting is set, so we'll run when restarted. */
1465        assert(q->tx_waiting);
1466        return;
1467    }
1468
1469    q->tx_waiting = 0;
1470
1471    /* Just in case the driver is not ready on more */
1472    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1473        return;
1474    }
1475
1476    virtio_queue_set_notification(q->tx_vq, 1);
1477    virtio_net_flush_tx(q);
1478}
1479
1480static void virtio_net_tx_bh(void *opaque)
1481{
1482    VirtIONetQueue *q = opaque;
1483    VirtIONet *n = q->n;
1484    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1485    int32_t ret;
1486
1487    /* This happens when device was stopped but BH wasn't. */
1488    if (!vdev->vm_running) {
1489        /* Make sure tx waiting is set, so we'll run when restarted. */
1490        assert(q->tx_waiting);
1491        return;
1492    }
1493
1494    q->tx_waiting = 0;
1495
1496    /* Just in case the driver is not ready on more */
1497    if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
1498        return;
1499    }
1500
1501    ret = virtio_net_flush_tx(q);
1502    if (ret == -EBUSY || ret == -EINVAL) {
1503        return; /* Notification re-enable handled by tx_complete or device
1504                 * broken */
1505    }
1506
1507    /* If we flush a full burst of packets, assume there are
1508     * more coming and immediately reschedule */
1509    if (ret >= n->tx_burst) {
1510        qemu_bh_schedule(q->tx_bh);
1511        q->tx_waiting = 1;
1512        return;
1513    }
1514
1515    /* If less than a full burst, re-enable notification and flush
1516     * anything that may have come in while we weren't looking.  If
1517     * we find something, assume the guest is still active and reschedule */
1518    virtio_queue_set_notification(q->tx_vq, 1);
1519    ret = virtio_net_flush_tx(q);
1520    if (ret == -EINVAL) {
1521        return;
1522    } else if (ret > 0) {
1523        virtio_queue_set_notification(q->tx_vq, 0);
1524        qemu_bh_schedule(q->tx_bh);
1525        q->tx_waiting = 1;
1526    }
1527}
1528
1529static void virtio_net_add_queue(VirtIONet *n, int index)
1530{
1531    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1532
1533    n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
1534                                           virtio_net_handle_rx);
1535
1536    if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
1537        n->vqs[index].tx_vq =
1538            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
1539                             virtio_net_handle_tx_timer);
1540        n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
1541                                              virtio_net_tx_timer,
1542                                              &n->vqs[index]);
1543    } else {
1544        n->vqs[index].tx_vq =
1545            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
1546                             virtio_net_handle_tx_bh);
1547        n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
1548    }
1549
1550    n->vqs[index].tx_waiting = 0;
1551    n->vqs[index].n = n;
1552}
1553
1554static void virtio_net_del_queue(VirtIONet *n, int index)
1555{
1556    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1557    VirtIONetQueue *q = &n->vqs[index];
1558    NetClientState *nc = qemu_get_subqueue(n->nic, index);
1559
1560    qemu_purge_queued_packets(nc);
1561
1562    virtio_del_queue(vdev, index * 2);
1563    if (q->tx_timer) {
1564        timer_del(q->tx_timer);
1565        timer_free(q->tx_timer);
1566        q->tx_timer = NULL;
1567    } else {
1568        qemu_bh_delete(q->tx_bh);
1569        q->tx_bh = NULL;
1570    }
1571    q->tx_waiting = 0;
1572    virtio_del_queue(vdev, index * 2 + 1);
1573}
1574
1575static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
1576{
1577    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1578    int old_num_queues = virtio_get_num_queues(vdev);
1579    int new_num_queues = new_max_queues * 2 + 1;
1580    int i;
1581
1582    assert(old_num_queues >= 3);
1583    assert(old_num_queues % 2 == 1);
1584
1585    if (old_num_queues == new_num_queues) {
1586        return;
1587    }
1588
1589    /*
1590     * We always need to remove and add ctrl vq if
1591     * old_num_queues != new_num_queues. Remove ctrl_vq first,
1592     * and then we only enter one of the following too loops.
1593     */
1594    virtio_del_queue(vdev, old_num_queues - 1);
1595
1596    for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
1597        /* new_num_queues < old_num_queues */
1598        virtio_net_del_queue(n, i / 2);
1599    }
1600
1601    for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
1602        /* new_num_queues > old_num_queues */
1603        virtio_net_add_queue(n, i / 2);
1604    }
1605
1606    /* add ctrl_vq last */
1607    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
1608}
1609
1610static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
1611{
1612    int max = multiqueue ? n->max_queues : 1;
1613
1614    n->multiqueue = multiqueue;
1615    virtio_net_change_num_queues(n, max);
1616
1617    virtio_net_set_queues(n);
1618}
1619
1620static int virtio_net_post_load_device(void *opaque, int version_id)
1621{
1622    VirtIONet *n = opaque;
1623    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1624    int i, link_down;
1625
1626    virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
1627                               virtio_vdev_has_feature(vdev,
1628                                                       VIRTIO_F_VERSION_1));
1629
1630    /* MAC_TABLE_ENTRIES may be different from the saved image */
1631    if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
1632        n->mac_table.in_use = 0;
1633    }
1634
1635    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
1636        n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
1637    }
1638
1639    if (peer_has_vnet_hdr(n)) {
1640        virtio_net_apply_guest_offloads(n);
1641    }
1642
1643    virtio_net_set_queues(n);
1644
1645    /* Find the first multicast entry in the saved MAC filter */
1646    for (i = 0; i < n->mac_table.in_use; i++) {
1647        if (n->mac_table.macs[i * ETH_ALEN] & 1) {
1648            break;
1649        }
1650    }
1651    n->mac_table.first_multi = i;
1652
1653    /* nc.link_down can't be migrated, so infer link_down according
1654     * to link status bit in n->status */
1655    link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
1656    for (i = 0; i < n->max_queues; i++) {
1657        qemu_get_subqueue(n->nic, i)->link_down = link_down;
1658    }
1659
1660    if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
1661        virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
1662        n->announce_counter = SELF_ANNOUNCE_ROUNDS;
1663        timer_mod(n->announce_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL));
1664    }
1665
1666    return 0;
1667}
1668
1669/* tx_waiting field of a VirtIONetQueue */
1670static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
1671    .name = "virtio-net-queue-tx_waiting",
1672    .fields = (VMStateField[]) {
1673        VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
1674        VMSTATE_END_OF_LIST()
1675   },
1676};
1677
1678static bool max_queues_gt_1(void *opaque, int version_id)
1679{
1680    return VIRTIO_NET(opaque)->max_queues > 1;
1681}
1682
1683static bool has_ctrl_guest_offloads(void *opaque, int version_id)
1684{
1685    return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
1686                                   VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
1687}
1688
1689static bool mac_table_fits(void *opaque, int version_id)
1690{
1691    return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
1692}
1693
1694static bool mac_table_doesnt_fit(void *opaque, int version_id)
1695{
1696    return !mac_table_fits(opaque, version_id);
1697}
1698
1699/* This temporary type is shared by all the WITH_TMP methods
1700 * although only some fields are used by each.
1701 */
1702struct VirtIONetMigTmp {
1703    VirtIONet      *parent;
1704    VirtIONetQueue *vqs_1;
1705    uint16_t        curr_queues_1;
1706    uint8_t         has_ufo;
1707    uint32_t        has_vnet_hdr;
1708};
1709
1710/* The 2nd and subsequent tx_waiting flags are loaded later than
1711 * the 1st entry in the queues and only if there's more than one
1712 * entry.  We use the tmp mechanism to calculate a temporary
1713 * pointer and count and also validate the count.
1714 */
1715
1716static int virtio_net_tx_waiting_pre_save(void *opaque)
1717{
1718    struct VirtIONetMigTmp *tmp = opaque;
1719
1720    tmp->vqs_1 = tmp->parent->vqs + 1;
1721    tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
1722    if (tmp->parent->curr_queues == 0) {
1723        tmp->curr_queues_1 = 0;
1724    }
1725
1726    return 0;
1727}
1728
1729static int virtio_net_tx_waiting_pre_load(void *opaque)
1730{
1731    struct VirtIONetMigTmp *tmp = opaque;
1732
1733    /* Reuse the pointer setup from save */
1734    virtio_net_tx_waiting_pre_save(opaque);
1735
1736    if (tmp->parent->curr_queues > tmp->parent->max_queues) {
1737        error_report("virtio-net: curr_queues %x > max_queues %x",
1738            tmp->parent->curr_queues, tmp->parent->max_queues);
1739
1740        return -EINVAL;
1741    }
1742
1743    return 0; /* all good */
1744}
1745
1746static const VMStateDescription vmstate_virtio_net_tx_waiting = {
1747    .name      = "virtio-net-tx_waiting",
1748    .pre_load  = virtio_net_tx_waiting_pre_load,
1749    .pre_save  = virtio_net_tx_waiting_pre_save,
1750    .fields    = (VMStateField[]) {
1751        VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
1752                                     curr_queues_1,
1753                                     vmstate_virtio_net_queue_tx_waiting,
1754                                     struct VirtIONetQueue),
1755        VMSTATE_END_OF_LIST()
1756    },
1757};
1758
1759/* the 'has_ufo' flag is just tested; if the incoming stream has the
1760 * flag set we need to check that we have it
1761 */
1762static int virtio_net_ufo_post_load(void *opaque, int version_id)
1763{
1764    struct VirtIONetMigTmp *tmp = opaque;
1765
1766    if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
1767        error_report("virtio-net: saved image requires TUN_F_UFO support");
1768        return -EINVAL;
1769    }
1770
1771    return 0;
1772}
1773
1774static int virtio_net_ufo_pre_save(void *opaque)
1775{
1776    struct VirtIONetMigTmp *tmp = opaque;
1777
1778    tmp->has_ufo = tmp->parent->has_ufo;
1779
1780    return 0;
1781}
1782
1783static const VMStateDescription vmstate_virtio_net_has_ufo = {
1784    .name      = "virtio-net-ufo",
1785    .post_load = virtio_net_ufo_post_load,
1786    .pre_save  = virtio_net_ufo_pre_save,
1787    .fields    = (VMStateField[]) {
1788        VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
1789        VMSTATE_END_OF_LIST()
1790    },
1791};
1792
1793/* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
1794 * flag set we need to check that we have it
1795 */
1796static int virtio_net_vnet_post_load(void *opaque, int version_id)
1797{
1798    struct VirtIONetMigTmp *tmp = opaque;
1799
1800    if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
1801        error_report("virtio-net: saved image requires vnet_hdr=on");
1802        return -EINVAL;
1803    }
1804
1805    return 0;
1806}
1807
1808static int virtio_net_vnet_pre_save(void *opaque)
1809{
1810    struct VirtIONetMigTmp *tmp = opaque;
1811
1812    tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
1813
1814    return 0;
1815}
1816
1817static const VMStateDescription vmstate_virtio_net_has_vnet = {
1818    .name      = "virtio-net-vnet",
1819    .post_load = virtio_net_vnet_post_load,
1820    .pre_save  = virtio_net_vnet_pre_save,
1821    .fields    = (VMStateField[]) {
1822        VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
1823        VMSTATE_END_OF_LIST()
1824    },
1825};
1826
1827static const VMStateDescription vmstate_virtio_net_device = {
1828    .name = "virtio-net-device",
1829    .version_id = VIRTIO_NET_VM_VERSION,
1830    .minimum_version_id = VIRTIO_NET_VM_VERSION,
1831    .post_load = virtio_net_post_load_device,
1832    .fields = (VMStateField[]) {
1833        VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
1834        VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
1835                               vmstate_virtio_net_queue_tx_waiting,
1836                               VirtIONetQueue),
1837        VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
1838        VMSTATE_UINT16(status, VirtIONet),
1839        VMSTATE_UINT8(promisc, VirtIONet),
1840        VMSTATE_UINT8(allmulti, VirtIONet),
1841        VMSTATE_UINT32(mac_table.in_use, VirtIONet),
1842
1843        /* Guarded pair: If it fits we load it, else we throw it away
1844         * - can happen if source has a larger MAC table.; post-load
1845         *  sets flags in this case.
1846         */
1847        VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
1848                                0, mac_table_fits, mac_table.in_use,
1849                                 ETH_ALEN),
1850        VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
1851                                     mac_table.in_use, ETH_ALEN),
1852
1853        /* Note: This is an array of uint32's that's always been saved as a
1854         * buffer; hold onto your endiannesses; it's actually used as a bitmap
1855         * but based on the uint.
1856         */
1857        VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
1858        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
1859                         vmstate_virtio_net_has_vnet),
1860        VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
1861        VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
1862        VMSTATE_UINT8(alluni, VirtIONet),
1863        VMSTATE_UINT8(nomulti, VirtIONet),
1864        VMSTATE_UINT8(nouni, VirtIONet),
1865        VMSTATE_UINT8(nobcast, VirtIONet),
1866        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
1867                         vmstate_virtio_net_has_ufo),
1868        VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
1869                            vmstate_info_uint16_equal, uint16_t),
1870        VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
1871        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
1872                         vmstate_virtio_net_tx_waiting),
1873        VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
1874                            has_ctrl_guest_offloads),
1875        VMSTATE_END_OF_LIST()
1876   },
1877};
1878
1879static NetClientInfo net_virtio_info = {
1880    .type = NET_CLIENT_DRIVER_NIC,
1881    .size = sizeof(NICState),
1882    .can_receive = virtio_net_can_receive,
1883    .receive = virtio_net_receive,
1884    .link_status_changed = virtio_net_set_link_status,
1885    .query_rx_filter = virtio_net_query_rxfilter,
1886};
1887
1888static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
1889{
1890    VirtIONet *n = VIRTIO_NET(vdev);
1891    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
1892    assert(n->vhost_started);
1893    return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
1894}
1895
1896static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
1897                                           bool mask)
1898{
1899    VirtIONet *n = VIRTIO_NET(vdev);
1900    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
1901    assert(n->vhost_started);
1902    vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
1903                             vdev, idx, mask);
1904}
1905
1906static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
1907{
1908    int i, config_size = 0;
1909    virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
1910
1911    for (i = 0; feature_sizes[i].flags != 0; i++) {
1912        if (host_features & feature_sizes[i].flags) {
1913            config_size = MAX(feature_sizes[i].end, config_size);
1914        }
1915    }
1916    n->config_size = config_size;
1917}
1918
1919void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
1920                                   const char *type)
1921{
1922    /*
1923     * The name can be NULL, the netclient name will be type.x.
1924     */
1925    assert(type != NULL);
1926
1927    g_free(n->netclient_name);
1928    g_free(n->netclient_type);
1929    n->netclient_name = g_strdup(name);
1930    n->netclient_type = g_strdup(type);
1931}
1932
1933static void virtio_net_device_realize(DeviceState *dev, Error **errp)
1934{
1935    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1936    VirtIONet *n = VIRTIO_NET(dev);
1937    NetClientState *nc;
1938    int i;
1939
1940    if (n->net_conf.mtu) {
1941        n->host_features |= (0x1 << VIRTIO_NET_F_MTU);
1942    }
1943
1944    virtio_net_set_config_size(n, n->host_features);
1945    virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
1946
1947    /*
1948     * We set a lower limit on RX queue size to what it always was.
1949     * Guests that want a smaller ring can always resize it without
1950     * help from us (using virtio 1 and up).
1951     */
1952    if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
1953        n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
1954        !is_power_of_2(n->net_conf.rx_queue_size)) {
1955        error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
1956                   "must be a power of 2 between %d and %d.",
1957                   n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
1958                   VIRTQUEUE_MAX_SIZE);
1959        virtio_cleanup(vdev);
1960        return;
1961    }
1962
1963    if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
1964        n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
1965        !is_power_of_2(n->net_conf.tx_queue_size)) {
1966        error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
1967                   "must be a power of 2 between %d and %d",
1968                   n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
1969                   VIRTQUEUE_MAX_SIZE);
1970        virtio_cleanup(vdev);
1971        return;
1972    }
1973
1974    n->max_queues = MAX(n->nic_conf.peers.queues, 1);
1975    if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
1976        error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
1977                   "must be a positive integer less than %d.",
1978                   n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
1979        virtio_cleanup(vdev);
1980        return;
1981    }
1982    n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
1983    n->curr_queues = 1;
1984    n->tx_timeout = n->net_conf.txtimer;
1985
1986    if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
1987                       && strcmp(n->net_conf.tx, "bh")) {
1988        error_report("virtio-net: "
1989                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
1990                     n->net_conf.tx);
1991        error_report("Defaulting to \"bh\"");
1992    }
1993
1994    n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
1995                                    n->net_conf.tx_queue_size);
1996
1997    for (i = 0; i < n->max_queues; i++) {
1998        virtio_net_add_queue(n, i);
1999    }
2000
2001    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2002    qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
2003    memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
2004    n->status = VIRTIO_NET_S_LINK_UP;
2005    n->announce_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
2006                                     virtio_net_announce_timer, n);
2007
2008    if (n->netclient_type) {
2009        /*
2010         * Happen when virtio_net_set_netclient_name has been called.
2011         */
2012        n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
2013                              n->netclient_type, n->netclient_name, n);
2014    } else {
2015        n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
2016                              object_get_typename(OBJECT(dev)), dev->id, n);
2017    }
2018
2019    peer_test_vnet_hdr(n);
2020    if (peer_has_vnet_hdr(n)) {
2021        for (i = 0; i < n->max_queues; i++) {
2022            qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
2023        }
2024        n->host_hdr_len = sizeof(struct virtio_net_hdr);
2025    } else {
2026        n->host_hdr_len = 0;
2027    }
2028
2029    qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
2030
2031    n->vqs[0].tx_waiting = 0;
2032    n->tx_burst = n->net_conf.txburst;
2033    virtio_net_set_mrg_rx_bufs(n, 0, 0);
2034    n->promisc = 1; /* for compatibility */
2035
2036    n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
2037
2038    n->vlans = g_malloc0(MAX_VLAN >> 3);
2039
2040    nc = qemu_get_queue(n->nic);
2041    nc->rxfilter_notify_enabled = 1;
2042
2043    n->qdev = dev;
2044}
2045
2046static void virtio_net_device_unrealize(DeviceState *dev, Error **errp)
2047{
2048    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2049    VirtIONet *n = VIRTIO_NET(dev);
2050    int i, max_queues;
2051
2052    /* This will stop vhost backend if appropriate. */
2053    virtio_net_set_status(vdev, 0);
2054
2055    g_free(n->netclient_name);
2056    n->netclient_name = NULL;
2057    g_free(n->netclient_type);
2058    n->netclient_type = NULL;
2059
2060    g_free(n->mac_table.macs);
2061    g_free(n->vlans);
2062
2063    max_queues = n->multiqueue ? n->max_queues : 1;
2064    for (i = 0; i < max_queues; i++) {
2065        virtio_net_del_queue(n, i);
2066    }
2067
2068    timer_del(n->announce_timer);
2069    timer_free(n->announce_timer);
2070    g_free(n->vqs);
2071    qemu_del_nic(n->nic);
2072    virtio_cleanup(vdev);
2073}
2074
2075static void virtio_net_instance_init(Object *obj)
2076{
2077    VirtIONet *n = VIRTIO_NET(obj);
2078
2079    /*
2080     * The default config_size is sizeof(struct virtio_net_config).
2081     * Can be overriden with virtio_net_set_config_size.
2082     */
2083    n->config_size = sizeof(struct virtio_net_config);
2084    device_add_bootindex_property(obj, &n->nic_conf.bootindex,
2085                                  "bootindex", "/ethernet-phy@0",
2086                                  DEVICE(n), NULL);
2087}
2088
2089static int virtio_net_pre_save(void *opaque)
2090{
2091    VirtIONet *n = opaque;
2092
2093    /* At this point, backend must be stopped, otherwise
2094     * it might keep writing to memory. */
2095    assert(!n->vhost_started);
2096
2097    return 0;
2098}
2099
2100static const VMStateDescription vmstate_virtio_net = {
2101    .name = "virtio-net",
2102    .minimum_version_id = VIRTIO_NET_VM_VERSION,
2103    .version_id = VIRTIO_NET_VM_VERSION,
2104    .fields = (VMStateField[]) {
2105        VMSTATE_VIRTIO_DEVICE,
2106        VMSTATE_END_OF_LIST()
2107    },
2108    .pre_save = virtio_net_pre_save,
2109};
2110
2111static Property virtio_net_properties[] = {
2112    DEFINE_PROP_BIT("csum", VirtIONet, host_features, VIRTIO_NET_F_CSUM, true),
2113    DEFINE_PROP_BIT("guest_csum", VirtIONet, host_features,
2114                    VIRTIO_NET_F_GUEST_CSUM, true),
2115    DEFINE_PROP_BIT("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
2116    DEFINE_PROP_BIT("guest_tso4", VirtIONet, host_features,
2117                    VIRTIO_NET_F_GUEST_TSO4, true),
2118    DEFINE_PROP_BIT("guest_tso6", VirtIONet, host_features,
2119                    VIRTIO_NET_F_GUEST_TSO6, true),
2120    DEFINE_PROP_BIT("guest_ecn", VirtIONet, host_features,
2121                    VIRTIO_NET_F_GUEST_ECN, true),
2122    DEFINE_PROP_BIT("guest_ufo", VirtIONet, host_features,
2123                    VIRTIO_NET_F_GUEST_UFO, true),
2124    DEFINE_PROP_BIT("guest_announce", VirtIONet, host_features,
2125                    VIRTIO_NET_F_GUEST_ANNOUNCE, true),
2126    DEFINE_PROP_BIT("host_tso4", VirtIONet, host_features,
2127                    VIRTIO_NET_F_HOST_TSO4, true),
2128    DEFINE_PROP_BIT("host_tso6", VirtIONet, host_features,
2129                    VIRTIO_NET_F_HOST_TSO6, true),
2130    DEFINE_PROP_BIT("host_ecn", VirtIONet, host_features,
2131                    VIRTIO_NET_F_HOST_ECN, true),
2132    DEFINE_PROP_BIT("host_ufo", VirtIONet, host_features,
2133                    VIRTIO_NET_F_HOST_UFO, true),
2134    DEFINE_PROP_BIT("mrg_rxbuf", VirtIONet, host_features,
2135                    VIRTIO_NET_F_MRG_RXBUF, true),
2136    DEFINE_PROP_BIT("status", VirtIONet, host_features,
2137                    VIRTIO_NET_F_STATUS, true),
2138    DEFINE_PROP_BIT("ctrl_vq", VirtIONet, host_features,
2139                    VIRTIO_NET_F_CTRL_VQ, true),
2140    DEFINE_PROP_BIT("ctrl_rx", VirtIONet, host_features,
2141                    VIRTIO_NET_F_CTRL_RX, true),
2142    DEFINE_PROP_BIT("ctrl_vlan", VirtIONet, host_features,
2143                    VIRTIO_NET_F_CTRL_VLAN, true),
2144    DEFINE_PROP_BIT("ctrl_rx_extra", VirtIONet, host_features,
2145                    VIRTIO_NET_F_CTRL_RX_EXTRA, true),
2146    DEFINE_PROP_BIT("ctrl_mac_addr", VirtIONet, host_features,
2147                    VIRTIO_NET_F_CTRL_MAC_ADDR, true),
2148    DEFINE_PROP_BIT("ctrl_guest_offloads", VirtIONet, host_features,
2149                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
2150    DEFINE_PROP_BIT("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
2151    DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
2152    DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
2153                       TX_TIMER_INTERVAL),
2154    DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
2155    DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
2156    DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
2157                       VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
2158    DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
2159                       VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
2160    DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
2161    DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
2162                     true),
2163    DEFINE_PROP_END_OF_LIST(),
2164};
2165
2166static void virtio_net_class_init(ObjectClass *klass, void *data)
2167{
2168    DeviceClass *dc = DEVICE_CLASS(klass);
2169    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
2170
2171    dc->props = virtio_net_properties;
2172    dc->vmsd = &vmstate_virtio_net;
2173    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
2174    vdc->realize = virtio_net_device_realize;
2175    vdc->unrealize = virtio_net_device_unrealize;
2176    vdc->get_config = virtio_net_get_config;
2177    vdc->set_config = virtio_net_set_config;
2178    vdc->get_features = virtio_net_get_features;
2179    vdc->set_features = virtio_net_set_features;
2180    vdc->bad_features = virtio_net_bad_features;
2181    vdc->reset = virtio_net_reset;
2182    vdc->set_status = virtio_net_set_status;
2183    vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
2184    vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
2185    vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
2186    vdc->vmsd = &vmstate_virtio_net_device;
2187}
2188
2189static const TypeInfo virtio_net_info = {
2190    .name = TYPE_VIRTIO_NET,
2191    .parent = TYPE_VIRTIO_DEVICE,
2192    .instance_size = sizeof(VirtIONet),
2193    .instance_init = virtio_net_instance_init,
2194    .class_init = virtio_net_class_init,
2195};
2196
2197static void virtio_register_types(void)
2198{
2199    type_register_static(&virtio_net_info);
2200}
2201
2202type_init(virtio_register_types)
2203