qemu/hw/net/virtio-net.c
<<
>>
Prefs
   1/*
   2 * Virtio Network Device
   3 *
   4 * Copyright IBM, Corp. 2007
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14#include "qemu/osdep.h"
  15#include "qemu/atomic.h"
  16#include "qemu/iov.h"
  17#include "qemu/main-loop.h"
  18#include "qemu/module.h"
  19#include "hw/virtio/virtio.h"
  20#include "net/net.h"
  21#include "net/checksum.h"
  22#include "net/tap.h"
  23#include "qemu/error-report.h"
  24#include "qemu/timer.h"
  25#include "qemu/option.h"
  26#include "qemu/option_int.h"
  27#include "qemu/config-file.h"
  28#include "qapi/qmp/qdict.h"
  29#include "hw/virtio/virtio-net.h"
  30#include "net/vhost_net.h"
  31#include "net/announce.h"
  32#include "hw/virtio/virtio-bus.h"
  33#include "qapi/error.h"
  34#include "qapi/qapi-events-net.h"
  35#include "hw/qdev-properties.h"
  36#include "qapi/qapi-types-migration.h"
  37#include "qapi/qapi-events-migration.h"
  38#include "hw/virtio/virtio-access.h"
  39#include "migration/misc.h"
  40#include "standard-headers/linux/ethtool.h"
  41#include "sysemu/sysemu.h"
  42#include "trace.h"
  43#include "monitor/qdev.h"
  44#include "hw/pci/pci.h"
  45
  46#define VIRTIO_NET_VM_VERSION    11
  47
  48#define MAC_TABLE_ENTRIES    64
  49#define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
  50
  51/* previously fixed value */
  52#define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
  53#define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
  54
  55/* for now, only allow larger queues; with virtio-1, guest can downsize */
  56#define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
  57#define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
  58
  59#define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
  60
  61#define VIRTIO_NET_TCP_FLAG         0x3F
  62#define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
  63
  64/* IPv4 max payload, 16 bits in the header */
  65#define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
  66#define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
  67
  68/* header length value in ip header without option */
  69#define VIRTIO_NET_IP4_HEADER_LENGTH 5
  70
  71#define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
  72#define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
  73
  74/* Purge coalesced packets timer interval, This value affects the performance
  75   a lot, and should be tuned carefully, '300000'(300us) is the recommended
  76   value to pass the WHQL test, '50000' can gain 2x netperf throughput with
  77   tso/gso/gro 'off'. */
  78#define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
  79
  80/* temporary until standard header include it */
  81#if !defined(VIRTIO_NET_HDR_F_RSC_INFO)
  82
  83#define VIRTIO_NET_HDR_F_RSC_INFO  4 /* rsc_ext data in csum_ fields */
  84#define VIRTIO_NET_F_RSC_EXT       61
  85
  86static inline __virtio16 *virtio_net_rsc_ext_num_packets(
  87    struct virtio_net_hdr *hdr)
  88{
  89    return &hdr->csum_start;
  90}
  91
  92static inline __virtio16 *virtio_net_rsc_ext_num_dupacks(
  93    struct virtio_net_hdr *hdr)
  94{
  95    return &hdr->csum_offset;
  96}
  97
  98#endif
  99
 100static VirtIOFeature feature_sizes[] = {
 101    {.flags = 1ULL << VIRTIO_NET_F_MAC,
 102     .end = endof(struct virtio_net_config, mac)},
 103    {.flags = 1ULL << VIRTIO_NET_F_STATUS,
 104     .end = endof(struct virtio_net_config, status)},
 105    {.flags = 1ULL << VIRTIO_NET_F_MQ,
 106     .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
 107    {.flags = 1ULL << VIRTIO_NET_F_MTU,
 108     .end = endof(struct virtio_net_config, mtu)},
 109    {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
 110     .end = endof(struct virtio_net_config, duplex)},
 111    {}
 112};
 113
 114static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
 115{
 116    VirtIONet *n = qemu_get_nic_opaque(nc);
 117
 118    return &n->vqs[nc->queue_index];
 119}
 120
 121static int vq2q(int queue_index)
 122{
 123    return queue_index / 2;
 124}
 125
 126/* TODO
 127 * - we could suppress RX interrupt if we were so inclined.
 128 */
 129
 130static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
 131{
 132    VirtIONet *n = VIRTIO_NET(vdev);
 133    struct virtio_net_config netcfg;
 134
 135    virtio_stw_p(vdev, &netcfg.status, n->status);
 136    virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
 137    virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
 138    memcpy(netcfg.mac, n->mac, ETH_ALEN);
 139    virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
 140    netcfg.duplex = n->net_conf.duplex;
 141    memcpy(config, &netcfg, n->config_size);
 142}
 143
 144static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
 145{
 146    VirtIONet *n = VIRTIO_NET(vdev);
 147    struct virtio_net_config netcfg = {};
 148
 149    memcpy(&netcfg, config, n->config_size);
 150
 151    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
 152        !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
 153        memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
 154        memcpy(n->mac, netcfg.mac, ETH_ALEN);
 155        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 156    }
 157}
 158
 159static bool virtio_net_started(VirtIONet *n, uint8_t status)
 160{
 161    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 162    return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 163        (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
 164}
 165
 166static void virtio_net_announce_notify(VirtIONet *net)
 167{
 168    VirtIODevice *vdev = VIRTIO_DEVICE(net);
 169    trace_virtio_net_announce_notify();
 170
 171    net->status |= VIRTIO_NET_S_ANNOUNCE;
 172    virtio_notify_config(vdev);
 173}
 174
 175static void virtio_net_announce_timer(void *opaque)
 176{
 177    VirtIONet *n = opaque;
 178    trace_virtio_net_announce_timer(n->announce_timer.round);
 179
 180    n->announce_timer.round--;
 181    virtio_net_announce_notify(n);
 182}
 183
 184static void virtio_net_announce(NetClientState *nc)
 185{
 186    VirtIONet *n = qemu_get_nic_opaque(nc);
 187    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 188
 189    /*
 190     * Make sure the virtio migration announcement timer isn't running
 191     * If it is, let it trigger announcement so that we do not cause
 192     * confusion.
 193     */
 194    if (n->announce_timer.round) {
 195        return;
 196    }
 197
 198    if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
 199        virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
 200            virtio_net_announce_notify(n);
 201    }
 202}
 203
 204static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
 205{
 206    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 207    NetClientState *nc = qemu_get_queue(n->nic);
 208    int queues = n->multiqueue ? n->max_queues : 1;
 209
 210    if (!get_vhost_net(nc->peer)) {
 211        return;
 212    }
 213
 214    if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
 215        !!n->vhost_started) {
 216        return;
 217    }
 218    if (!n->vhost_started) {
 219        int r, i;
 220
 221        if (n->needs_vnet_hdr_swap) {
 222            error_report("backend does not support %s vnet headers; "
 223                         "falling back on userspace virtio",
 224                         virtio_is_big_endian(vdev) ? "BE" : "LE");
 225            return;
 226        }
 227
 228        /* Any packets outstanding? Purge them to avoid touching rings
 229         * when vhost is running.
 230         */
 231        for (i = 0;  i < queues; i++) {
 232            NetClientState *qnc = qemu_get_subqueue(n->nic, i);
 233
 234            /* Purge both directions: TX and RX. */
 235            qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
 236            qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
 237        }
 238
 239        if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
 240            r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
 241            if (r < 0) {
 242                error_report("%uBytes MTU not supported by the backend",
 243                             n->net_conf.mtu);
 244
 245                return;
 246            }
 247        }
 248
 249        n->vhost_started = 1;
 250        r = vhost_net_start(vdev, n->nic->ncs, queues);
 251        if (r < 0) {
 252            error_report("unable to start vhost net: %d: "
 253                         "falling back on userspace virtio", -r);
 254            n->vhost_started = 0;
 255        }
 256    } else {
 257        vhost_net_stop(vdev, n->nic->ncs, queues);
 258        n->vhost_started = 0;
 259    }
 260}
 261
 262static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
 263                                          NetClientState *peer,
 264                                          bool enable)
 265{
 266    if (virtio_is_big_endian(vdev)) {
 267        return qemu_set_vnet_be(peer, enable);
 268    } else {
 269        return qemu_set_vnet_le(peer, enable);
 270    }
 271}
 272
 273static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
 274                                       int queues, bool enable)
 275{
 276    int i;
 277
 278    for (i = 0; i < queues; i++) {
 279        if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
 280            enable) {
 281            while (--i >= 0) {
 282                virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
 283            }
 284
 285            return true;
 286        }
 287    }
 288
 289    return false;
 290}
 291
 292static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
 293{
 294    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 295    int queues = n->multiqueue ? n->max_queues : 1;
 296
 297    if (virtio_net_started(n, status)) {
 298        /* Before using the device, we tell the network backend about the
 299         * endianness to use when parsing vnet headers. If the backend
 300         * can't do it, we fallback onto fixing the headers in the core
 301         * virtio-net code.
 302         */
 303        n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
 304                                                            queues, true);
 305    } else if (virtio_net_started(n, vdev->status)) {
 306        /* After using the device, we need to reset the network backend to
 307         * the default (guest native endianness), otherwise the guest may
 308         * lose network connectivity if it is rebooted into a different
 309         * endianness.
 310         */
 311        virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
 312    }
 313}
 314
 315static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
 316{
 317    unsigned int dropped = virtqueue_drop_all(vq);
 318    if (dropped) {
 319        virtio_notify(vdev, vq);
 320    }
 321}
 322
 323static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
 324{
 325    VirtIONet *n = VIRTIO_NET(vdev);
 326    VirtIONetQueue *q;
 327    int i;
 328    uint8_t queue_status;
 329
 330    virtio_net_vnet_endian_status(n, status);
 331    virtio_net_vhost_status(n, status);
 332
 333    for (i = 0; i < n->max_queues; i++) {
 334        NetClientState *ncs = qemu_get_subqueue(n->nic, i);
 335        bool queue_started;
 336        q = &n->vqs[i];
 337
 338        if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
 339            queue_status = 0;
 340        } else {
 341            queue_status = status;
 342        }
 343        queue_started =
 344            virtio_net_started(n, queue_status) && !n->vhost_started;
 345
 346        if (queue_started) {
 347            qemu_flush_queued_packets(ncs);
 348        }
 349
 350        if (!q->tx_waiting) {
 351            continue;
 352        }
 353
 354        if (queue_started) {
 355            if (q->tx_timer) {
 356                timer_mod(q->tx_timer,
 357                               qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
 358            } else {
 359                qemu_bh_schedule(q->tx_bh);
 360            }
 361        } else {
 362            if (q->tx_timer) {
 363                timer_del(q->tx_timer);
 364            } else {
 365                qemu_bh_cancel(q->tx_bh);
 366            }
 367            if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
 368                (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 369                vdev->vm_running) {
 370                /* if tx is waiting we are likely have some packets in tx queue
 371                 * and disabled notification */
 372                q->tx_waiting = 0;
 373                virtio_queue_set_notification(q->tx_vq, 1);
 374                virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
 375            }
 376        }
 377    }
 378}
 379
 380static void virtio_net_set_link_status(NetClientState *nc)
 381{
 382    VirtIONet *n = qemu_get_nic_opaque(nc);
 383    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 384    uint16_t old_status = n->status;
 385
 386    if (nc->link_down)
 387        n->status &= ~VIRTIO_NET_S_LINK_UP;
 388    else
 389        n->status |= VIRTIO_NET_S_LINK_UP;
 390
 391    if (n->status != old_status)
 392        virtio_notify_config(vdev);
 393
 394    virtio_net_set_status(vdev, vdev->status);
 395}
 396
 397static void rxfilter_notify(NetClientState *nc)
 398{
 399    VirtIONet *n = qemu_get_nic_opaque(nc);
 400
 401    if (nc->rxfilter_notify_enabled) {
 402        gchar *path = object_get_canonical_path(OBJECT(n->qdev));
 403        qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
 404                                              n->netclient_name, path);
 405        g_free(path);
 406
 407        /* disable event notification to avoid events flooding */
 408        nc->rxfilter_notify_enabled = 0;
 409    }
 410}
 411
 412static intList *get_vlan_table(VirtIONet *n)
 413{
 414    intList *list, *entry;
 415    int i, j;
 416
 417    list = NULL;
 418    for (i = 0; i < MAX_VLAN >> 5; i++) {
 419        for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
 420            if (n->vlans[i] & (1U << j)) {
 421                entry = g_malloc0(sizeof(*entry));
 422                entry->value = (i << 5) + j;
 423                entry->next = list;
 424                list = entry;
 425            }
 426        }
 427    }
 428
 429    return list;
 430}
 431
 432static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
 433{
 434    VirtIONet *n = qemu_get_nic_opaque(nc);
 435    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 436    RxFilterInfo *info;
 437    strList *str_list, *entry;
 438    int i;
 439
 440    info = g_malloc0(sizeof(*info));
 441    info->name = g_strdup(nc->name);
 442    info->promiscuous = n->promisc;
 443
 444    if (n->nouni) {
 445        info->unicast = RX_STATE_NONE;
 446    } else if (n->alluni) {
 447        info->unicast = RX_STATE_ALL;
 448    } else {
 449        info->unicast = RX_STATE_NORMAL;
 450    }
 451
 452    if (n->nomulti) {
 453        info->multicast = RX_STATE_NONE;
 454    } else if (n->allmulti) {
 455        info->multicast = RX_STATE_ALL;
 456    } else {
 457        info->multicast = RX_STATE_NORMAL;
 458    }
 459
 460    info->broadcast_allowed = n->nobcast;
 461    info->multicast_overflow = n->mac_table.multi_overflow;
 462    info->unicast_overflow = n->mac_table.uni_overflow;
 463
 464    info->main_mac = qemu_mac_strdup_printf(n->mac);
 465
 466    str_list = NULL;
 467    for (i = 0; i < n->mac_table.first_multi; i++) {
 468        entry = g_malloc0(sizeof(*entry));
 469        entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
 470        entry->next = str_list;
 471        str_list = entry;
 472    }
 473    info->unicast_table = str_list;
 474
 475    str_list = NULL;
 476    for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
 477        entry = g_malloc0(sizeof(*entry));
 478        entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
 479        entry->next = str_list;
 480        str_list = entry;
 481    }
 482    info->multicast_table = str_list;
 483    info->vlan_table = get_vlan_table(n);
 484
 485    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
 486        info->vlan = RX_STATE_ALL;
 487    } else if (!info->vlan_table) {
 488        info->vlan = RX_STATE_NONE;
 489    } else {
 490        info->vlan = RX_STATE_NORMAL;
 491    }
 492
 493    /* enable event notification after query */
 494    nc->rxfilter_notify_enabled = 1;
 495
 496    return info;
 497}
 498
 499static void virtio_net_reset(VirtIODevice *vdev)
 500{
 501    VirtIONet *n = VIRTIO_NET(vdev);
 502    int i;
 503
 504    /* Reset back to compatibility mode */
 505    n->promisc = 1;
 506    n->allmulti = 0;
 507    n->alluni = 0;
 508    n->nomulti = 0;
 509    n->nouni = 0;
 510    n->nobcast = 0;
 511    /* multiqueue is disabled by default */
 512    n->curr_queues = 1;
 513    timer_del(n->announce_timer.tm);
 514    n->announce_timer.round = 0;
 515    n->status &= ~VIRTIO_NET_S_ANNOUNCE;
 516
 517    /* Flush any MAC and VLAN filter table state */
 518    n->mac_table.in_use = 0;
 519    n->mac_table.first_multi = 0;
 520    n->mac_table.multi_overflow = 0;
 521    n->mac_table.uni_overflow = 0;
 522    memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
 523    memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
 524    qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 525    memset(n->vlans, 0, MAX_VLAN >> 3);
 526
 527    /* Flush any async TX */
 528    for (i = 0;  i < n->max_queues; i++) {
 529        NetClientState *nc = qemu_get_subqueue(n->nic, i);
 530
 531        if (nc->peer) {
 532            qemu_flush_or_purge_queued_packets(nc->peer, true);
 533            assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
 534        }
 535    }
 536}
 537
 538static void peer_test_vnet_hdr(VirtIONet *n)
 539{
 540    NetClientState *nc = qemu_get_queue(n->nic);
 541    if (!nc->peer) {
 542        return;
 543    }
 544
 545    n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
 546}
 547
 548static int peer_has_vnet_hdr(VirtIONet *n)
 549{
 550    return n->has_vnet_hdr;
 551}
 552
 553static int peer_has_ufo(VirtIONet *n)
 554{
 555    if (!peer_has_vnet_hdr(n))
 556        return 0;
 557
 558    n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
 559
 560    return n->has_ufo;
 561}
 562
 563static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
 564                                       int version_1)
 565{
 566    int i;
 567    NetClientState *nc;
 568
 569    n->mergeable_rx_bufs = mergeable_rx_bufs;
 570
 571    if (version_1) {
 572        n->guest_hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
 573    } else {
 574        n->guest_hdr_len = n->mergeable_rx_bufs ?
 575            sizeof(struct virtio_net_hdr_mrg_rxbuf) :
 576            sizeof(struct virtio_net_hdr);
 577    }
 578
 579    for (i = 0; i < n->max_queues; i++) {
 580        nc = qemu_get_subqueue(n->nic, i);
 581
 582        if (peer_has_vnet_hdr(n) &&
 583            qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
 584            qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
 585            n->host_hdr_len = n->guest_hdr_len;
 586        }
 587    }
 588}
 589
 590static int virtio_net_max_tx_queue_size(VirtIONet *n)
 591{
 592    NetClientState *peer = n->nic_conf.peers.ncs[0];
 593
 594    /*
 595     * Backends other than vhost-user don't support max queue size.
 596     */
 597    if (!peer) {
 598        return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
 599    }
 600
 601    if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
 602        return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
 603    }
 604
 605    return VIRTQUEUE_MAX_SIZE;
 606}
 607
 608static int peer_attach(VirtIONet *n, int index)
 609{
 610    NetClientState *nc = qemu_get_subqueue(n->nic, index);
 611
 612    if (!nc->peer) {
 613        return 0;
 614    }
 615
 616    if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 617        vhost_set_vring_enable(nc->peer, 1);
 618    }
 619
 620    if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
 621        return 0;
 622    }
 623
 624    if (n->max_queues == 1) {
 625        return 0;
 626    }
 627
 628    return tap_enable(nc->peer);
 629}
 630
 631static int peer_detach(VirtIONet *n, int index)
 632{
 633    NetClientState *nc = qemu_get_subqueue(n->nic, index);
 634
 635    if (!nc->peer) {
 636        return 0;
 637    }
 638
 639    if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 640        vhost_set_vring_enable(nc->peer, 0);
 641    }
 642
 643    if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
 644        return 0;
 645    }
 646
 647    return tap_disable(nc->peer);
 648}
 649
 650static void virtio_net_set_queues(VirtIONet *n)
 651{
 652    int i;
 653    int r;
 654
 655    if (n->nic->peer_deleted) {
 656        return;
 657    }
 658
 659    for (i = 0; i < n->max_queues; i++) {
 660        if (i < n->curr_queues) {
 661            r = peer_attach(n, i);
 662            assert(!r);
 663        } else {
 664            r = peer_detach(n, i);
 665            assert(!r);
 666        }
 667    }
 668}
 669
 670static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
 671
 672static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
 673                                        Error **errp)
 674{
 675    VirtIONet *n = VIRTIO_NET(vdev);
 676    NetClientState *nc = qemu_get_queue(n->nic);
 677
 678    /* Firstly sync all virtio-net possible supported features */
 679    features |= n->host_features;
 680
 681    virtio_add_feature(&features, VIRTIO_NET_F_MAC);
 682
 683    if (!peer_has_vnet_hdr(n)) {
 684        virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
 685        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
 686        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
 687        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
 688
 689        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
 690        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
 691        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
 692        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
 693    }
 694
 695    if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
 696        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
 697        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
 698    }
 699
 700    if (!get_vhost_net(nc->peer)) {
 701        return features;
 702    }
 703
 704    features = vhost_net_get_features(get_vhost_net(nc->peer), features);
 705    vdev->backend_features = features;
 706
 707    if (n->mtu_bypass_backend &&
 708            (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
 709        features |= (1ULL << VIRTIO_NET_F_MTU);
 710    }
 711
 712    return features;
 713}
 714
 715static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
 716{
 717    uint64_t features = 0;
 718
 719    /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
 720     * but also these: */
 721    virtio_add_feature(&features, VIRTIO_NET_F_MAC);
 722    virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
 723    virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
 724    virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
 725    virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
 726
 727    return features;
 728}
 729
 730static void virtio_net_apply_guest_offloads(VirtIONet *n)
 731{
 732    qemu_set_offload(qemu_get_queue(n->nic)->peer,
 733            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
 734            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
 735            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
 736            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
 737            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
 738}
 739
 740static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
 741{
 742    static const uint64_t guest_offloads_mask =
 743        (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
 744        (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
 745        (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
 746        (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
 747        (1ULL << VIRTIO_NET_F_GUEST_UFO);
 748
 749    return guest_offloads_mask & features;
 750}
 751
 752static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
 753{
 754    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 755    return virtio_net_guest_offloads_by_features(vdev->guest_features);
 756}
 757
 758static void failover_add_primary(VirtIONet *n, Error **errp)
 759{
 760    Error *err = NULL;
 761
 762    if (n->primary_dev) {
 763        return;
 764    }
 765
 766    n->primary_device_opts = qemu_opts_find(qemu_find_opts("device"),
 767            n->primary_device_id);
 768    if (n->primary_device_opts) {
 769        n->primary_dev = qdev_device_add(n->primary_device_opts, &err);
 770        if (err) {
 771            qemu_opts_del(n->primary_device_opts);
 772        }
 773        if (n->primary_dev) {
 774            n->primary_bus = n->primary_dev->parent_bus;
 775            if (err) {
 776                qdev_unplug(n->primary_dev, &err);
 777                qdev_set_id(n->primary_dev, "");
 778
 779            }
 780        }
 781    } else {
 782        error_setg(errp, "Primary device not found");
 783        error_append_hint(errp, "Virtio-net failover will not work. Make "
 784            "sure primary device has parameter"
 785            " failover_pair_id=<virtio-net-id>\n");
 786}
 787    if (err) {
 788        error_propagate(errp, err);
 789    }
 790}
 791
 792static int is_my_primary(void *opaque, QemuOpts *opts, Error **errp)
 793{
 794    VirtIONet *n = opaque;
 795    int ret = 0;
 796
 797    const char *standby_id = qemu_opt_get(opts, "failover_pair_id");
 798
 799    if (standby_id != NULL && (g_strcmp0(standby_id, n->netclient_name) == 0)) {
 800        n->primary_device_id = g_strdup(opts->id);
 801        ret = 1;
 802    }
 803
 804    return ret;
 805}
 806
 807static DeviceState *virtio_net_find_primary(VirtIONet *n, Error **errp)
 808{
 809    DeviceState *dev = NULL;
 810    Error *err = NULL;
 811
 812    if (qemu_opts_foreach(qemu_find_opts("device"),
 813                         is_my_primary, n, &err)) {
 814        if (err) {
 815            error_propagate(errp, err);
 816            return NULL;
 817        }
 818        if (n->primary_device_id) {
 819            dev = qdev_find_recursive(sysbus_get_default(),
 820                    n->primary_device_id);
 821        } else {
 822            error_setg(errp, "Primary device id not found");
 823            return NULL;
 824        }
 825    }
 826    return dev;
 827}
 828
 829
 830
 831static DeviceState *virtio_connect_failover_devices(VirtIONet *n,
 832                                                    DeviceState *dev,
 833                                                    Error **errp)
 834{
 835    DeviceState *prim_dev = NULL;
 836    Error *err = NULL;
 837
 838    prim_dev = virtio_net_find_primary(n, &err);
 839    if (prim_dev) {
 840        n->primary_device_id = g_strdup(prim_dev->id);
 841        n->primary_device_opts = prim_dev->opts;
 842    } else {
 843        if (err) {
 844            error_propagate(errp, err);
 845        }
 846    }
 847
 848    return prim_dev;
 849}
 850
 851static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
 852{
 853    VirtIONet *n = VIRTIO_NET(vdev);
 854    Error *err = NULL;
 855    int i;
 856
 857    if (n->mtu_bypass_backend &&
 858            !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
 859        features &= ~(1ULL << VIRTIO_NET_F_MTU);
 860    }
 861
 862    virtio_net_set_multiqueue(n,
 863                              virtio_has_feature(features, VIRTIO_NET_F_MQ));
 864
 865    virtio_net_set_mrg_rx_bufs(n,
 866                               virtio_has_feature(features,
 867                                                  VIRTIO_NET_F_MRG_RXBUF),
 868                               virtio_has_feature(features,
 869                                                  VIRTIO_F_VERSION_1));
 870
 871    n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
 872        virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
 873    n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
 874        virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
 875
 876    if (n->has_vnet_hdr) {
 877        n->curr_guest_offloads =
 878            virtio_net_guest_offloads_by_features(features);
 879        virtio_net_apply_guest_offloads(n);
 880    }
 881
 882    for (i = 0;  i < n->max_queues; i++) {
 883        NetClientState *nc = qemu_get_subqueue(n->nic, i);
 884
 885        if (!get_vhost_net(nc->peer)) {
 886            continue;
 887        }
 888        vhost_net_ack_features(get_vhost_net(nc->peer), features);
 889    }
 890
 891    if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
 892        memset(n->vlans, 0, MAX_VLAN >> 3);
 893    } else {
 894        memset(n->vlans, 0xff, MAX_VLAN >> 3);
 895    }
 896
 897    if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
 898        qapi_event_send_failover_negotiated(n->netclient_name);
 899        atomic_set(&n->primary_should_be_hidden, false);
 900        failover_add_primary(n, &err);
 901        if (err) {
 902            n->primary_dev = virtio_connect_failover_devices(n, n->qdev, &err);
 903            if (err) {
 904                goto out_err;
 905            }
 906            failover_add_primary(n, &err);
 907            if (err) {
 908                goto out_err;
 909            }
 910        }
 911    }
 912    return;
 913
 914out_err:
 915    if (err) {
 916        warn_report_err(err);
 917    }
 918}
 919
 920static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
 921                                     struct iovec *iov, unsigned int iov_cnt)
 922{
 923    uint8_t on;
 924    size_t s;
 925    NetClientState *nc = qemu_get_queue(n->nic);
 926
 927    s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
 928    if (s != sizeof(on)) {
 929        return VIRTIO_NET_ERR;
 930    }
 931
 932    if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
 933        n->promisc = on;
 934    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
 935        n->allmulti = on;
 936    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
 937        n->alluni = on;
 938    } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
 939        n->nomulti = on;
 940    } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
 941        n->nouni = on;
 942    } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
 943        n->nobcast = on;
 944    } else {
 945        return VIRTIO_NET_ERR;
 946    }
 947
 948    rxfilter_notify(nc);
 949
 950    return VIRTIO_NET_OK;
 951}
 952
 953static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
 954                                     struct iovec *iov, unsigned int iov_cnt)
 955{
 956    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 957    uint64_t offloads;
 958    size_t s;
 959
 960    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
 961        return VIRTIO_NET_ERR;
 962    }
 963
 964    s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
 965    if (s != sizeof(offloads)) {
 966        return VIRTIO_NET_ERR;
 967    }
 968
 969    if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
 970        uint64_t supported_offloads;
 971
 972        offloads = virtio_ldq_p(vdev, &offloads);
 973
 974        if (!n->has_vnet_hdr) {
 975            return VIRTIO_NET_ERR;
 976        }
 977
 978        n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
 979            virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
 980        n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
 981            virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
 982        virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
 983
 984        supported_offloads = virtio_net_supported_guest_offloads(n);
 985        if (offloads & ~supported_offloads) {
 986            return VIRTIO_NET_ERR;
 987        }
 988
 989        n->curr_guest_offloads = offloads;
 990        virtio_net_apply_guest_offloads(n);
 991
 992        return VIRTIO_NET_OK;
 993    } else {
 994        return VIRTIO_NET_ERR;
 995    }
 996}
 997
 998static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
 999                                 struct iovec *iov, unsigned int iov_cnt)
1000{
1001    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1002    struct virtio_net_ctrl_mac mac_data;
1003    size_t s;
1004    NetClientState *nc = qemu_get_queue(n->nic);
1005
1006    if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1007        if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1008            return VIRTIO_NET_ERR;
1009        }
1010        s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1011        assert(s == sizeof(n->mac));
1012        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1013        rxfilter_notify(nc);
1014
1015        return VIRTIO_NET_OK;
1016    }
1017
1018    if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1019        return VIRTIO_NET_ERR;
1020    }
1021
1022    int in_use = 0;
1023    int first_multi = 0;
1024    uint8_t uni_overflow = 0;
1025    uint8_t multi_overflow = 0;
1026    uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1027
1028    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1029                   sizeof(mac_data.entries));
1030    mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1031    if (s != sizeof(mac_data.entries)) {
1032        goto error;
1033    }
1034    iov_discard_front(&iov, &iov_cnt, s);
1035
1036    if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1037        goto error;
1038    }
1039
1040    if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1041        s = iov_to_buf(iov, iov_cnt, 0, macs,
1042                       mac_data.entries * ETH_ALEN);
1043        if (s != mac_data.entries * ETH_ALEN) {
1044            goto error;
1045        }
1046        in_use += mac_data.entries;
1047    } else {
1048        uni_overflow = 1;
1049    }
1050
1051    iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1052
1053    first_multi = in_use;
1054
1055    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1056                   sizeof(mac_data.entries));
1057    mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1058    if (s != sizeof(mac_data.entries)) {
1059        goto error;
1060    }
1061
1062    iov_discard_front(&iov, &iov_cnt, s);
1063
1064    if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1065        goto error;
1066    }
1067
1068    if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1069        s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1070                       mac_data.entries * ETH_ALEN);
1071        if (s != mac_data.entries * ETH_ALEN) {
1072            goto error;
1073        }
1074        in_use += mac_data.entries;
1075    } else {
1076        multi_overflow = 1;
1077    }
1078
1079    n->mac_table.in_use = in_use;
1080    n->mac_table.first_multi = first_multi;
1081    n->mac_table.uni_overflow = uni_overflow;
1082    n->mac_table.multi_overflow = multi_overflow;
1083    memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1084    g_free(macs);
1085    rxfilter_notify(nc);
1086
1087    return VIRTIO_NET_OK;
1088
1089error:
1090    g_free(macs);
1091    return VIRTIO_NET_ERR;
1092}
1093
1094static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1095                                        struct iovec *iov, unsigned int iov_cnt)
1096{
1097    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1098    uint16_t vid;
1099    size_t s;
1100    NetClientState *nc = qemu_get_queue(n->nic);
1101
1102    s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1103    vid = virtio_lduw_p(vdev, &vid);
1104    if (s != sizeof(vid)) {
1105        return VIRTIO_NET_ERR;
1106    }
1107
1108    if (vid >= MAX_VLAN)
1109        return VIRTIO_NET_ERR;
1110
1111    if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1112        n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1113    else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1114        n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1115    else
1116        return VIRTIO_NET_ERR;
1117
1118    rxfilter_notify(nc);
1119
1120    return VIRTIO_NET_OK;
1121}
1122
1123static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1124                                      struct iovec *iov, unsigned int iov_cnt)
1125{
1126    trace_virtio_net_handle_announce(n->announce_timer.round);
1127    if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1128        n->status & VIRTIO_NET_S_ANNOUNCE) {
1129        n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1130        if (n->announce_timer.round) {
1131            qemu_announce_timer_step(&n->announce_timer);
1132        }
1133        return VIRTIO_NET_OK;
1134    } else {
1135        return VIRTIO_NET_ERR;
1136    }
1137}
1138
1139static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1140                                struct iovec *iov, unsigned int iov_cnt)
1141{
1142    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1143    struct virtio_net_ctrl_mq mq;
1144    size_t s;
1145    uint16_t queues;
1146
1147    s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1148    if (s != sizeof(mq)) {
1149        return VIRTIO_NET_ERR;
1150    }
1151
1152    if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1153        return VIRTIO_NET_ERR;
1154    }
1155
1156    queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1157
1158    if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1159        queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1160        queues > n->max_queues ||
1161        !n->multiqueue) {
1162        return VIRTIO_NET_ERR;
1163    }
1164
1165    n->curr_queues = queues;
1166    /* stop the backend before changing the number of queues to avoid handling a
1167     * disabled queue */
1168    virtio_net_set_status(vdev, vdev->status);
1169    virtio_net_set_queues(n);
1170
1171    return VIRTIO_NET_OK;
1172}
1173
1174static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1175{
1176    VirtIONet *n = VIRTIO_NET(vdev);
1177    struct virtio_net_ctrl_hdr ctrl;
1178    virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1179    VirtQueueElement *elem;
1180    size_t s;
1181    struct iovec *iov, *iov2;
1182    unsigned int iov_cnt;
1183
1184    for (;;) {
1185        elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1186        if (!elem) {
1187            break;
1188        }
1189        if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1190            iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
1191            virtio_error(vdev, "virtio-net ctrl missing headers");
1192            virtqueue_detach_element(vq, elem, 0);
1193            g_free(elem);
1194            break;
1195        }
1196
1197        iov_cnt = elem->out_num;
1198        iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
1199        s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1200        iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1201        if (s != sizeof(ctrl)) {
1202            status = VIRTIO_NET_ERR;
1203        } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1204            status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1205        } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1206            status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1207        } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1208            status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
1209        } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1210            status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
1211        } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1212            status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
1213        } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1214            status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
1215        }
1216
1217        s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
1218        assert(s == sizeof(status));
1219
1220        virtqueue_push(vq, elem, sizeof(status));
1221        virtio_notify(vdev, vq);
1222        g_free(iov2);
1223        g_free(elem);
1224    }
1225}
1226
1227/* RX */
1228
1229static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1230{
1231    VirtIONet *n = VIRTIO_NET(vdev);
1232    int queue_index = vq2q(virtio_get_queue_index(vq));
1233
1234    qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1235}
1236
1237static int virtio_net_can_receive(NetClientState *nc)
1238{
1239    VirtIONet *n = qemu_get_nic_opaque(nc);
1240    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1241    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1242
1243    if (!vdev->vm_running) {
1244        return 0;
1245    }
1246
1247    if (nc->queue_index >= n->curr_queues) {
1248        return 0;
1249    }
1250
1251    if (!virtio_queue_ready(q->rx_vq) ||
1252        !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1253        return 0;
1254    }
1255
1256    return 1;
1257}
1258
1259static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1260{
1261    VirtIONet *n = q->n;
1262    if (virtio_queue_empty(q->rx_vq) ||
1263        (n->mergeable_rx_bufs &&
1264         !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1265        virtio_queue_set_notification(q->rx_vq, 1);
1266
1267        /* To avoid a race condition where the guest has made some buffers
1268         * available after the above check but before notification was
1269         * enabled, check for available buffers again.
1270         */
1271        if (virtio_queue_empty(q->rx_vq) ||
1272            (n->mergeable_rx_bufs &&
1273             !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1274            return 0;
1275        }
1276    }
1277
1278    virtio_queue_set_notification(q->rx_vq, 0);
1279    return 1;
1280}
1281
1282static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1283{
1284    virtio_tswap16s(vdev, &hdr->hdr_len);
1285    virtio_tswap16s(vdev, &hdr->gso_size);
1286    virtio_tswap16s(vdev, &hdr->csum_start);
1287    virtio_tswap16s(vdev, &hdr->csum_offset);
1288}
1289
1290/* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1291 * it never finds out that the packets don't have valid checksums.  This
1292 * causes dhclient to get upset.  Fedora's carried a patch for ages to
1293 * fix this with Xen but it hasn't appeared in an upstream release of
1294 * dhclient yet.
1295 *
1296 * To avoid breaking existing guests, we catch udp packets and add
1297 * checksums.  This is terrible but it's better than hacking the guest
1298 * kernels.
1299 *
1300 * N.B. if we introduce a zero-copy API, this operation is no longer free so
1301 * we should provide a mechanism to disable it to avoid polluting the host
1302 * cache.
1303 */
1304static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1305                                        uint8_t *buf, size_t size)
1306{
1307    if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1308        (size > 27 && size < 1500) && /* normal sized MTU */
1309        (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1310        (buf[23] == 17) && /* ip.protocol == UDP */
1311        (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1312        net_checksum_calculate(buf, size);
1313        hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1314    }
1315}
1316
1317static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1318                           const void *buf, size_t size)
1319{
1320    if (n->has_vnet_hdr) {
1321        /* FIXME this cast is evil */
1322        void *wbuf = (void *)buf;
1323        work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1324                                    size - n->host_hdr_len);
1325
1326        if (n->needs_vnet_hdr_swap) {
1327            virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1328        }
1329        iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1330    } else {
1331        struct virtio_net_hdr hdr = {
1332            .flags = 0,
1333            .gso_type = VIRTIO_NET_HDR_GSO_NONE
1334        };
1335        iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1336    }
1337}
1338
1339static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1340{
1341    static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1342    static const uint8_t vlan[] = {0x81, 0x00};
1343    uint8_t *ptr = (uint8_t *)buf;
1344    int i;
1345
1346    if (n->promisc)
1347        return 1;
1348
1349    ptr += n->host_hdr_len;
1350
1351    if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1352        int vid = lduw_be_p(ptr + 14) & 0xfff;
1353        if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1354            return 0;
1355    }
1356
1357    if (ptr[0] & 1) { // multicast
1358        if (!memcmp(ptr, bcast, sizeof(bcast))) {
1359            return !n->nobcast;
1360        } else if (n->nomulti) {
1361            return 0;
1362        } else if (n->allmulti || n->mac_table.multi_overflow) {
1363            return 1;
1364        }
1365
1366        for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1367            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1368                return 1;
1369            }
1370        }
1371    } else { // unicast
1372        if (n->nouni) {
1373            return 0;
1374        } else if (n->alluni || n->mac_table.uni_overflow) {
1375            return 1;
1376        } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1377            return 1;
1378        }
1379
1380        for (i = 0; i < n->mac_table.first_multi; i++) {
1381            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1382                return 1;
1383            }
1384        }
1385    }
1386
1387    return 0;
1388}
1389
1390static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1391                                      size_t size)
1392{
1393    VirtIONet *n = qemu_get_nic_opaque(nc);
1394    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1395    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1396    struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1397    struct virtio_net_hdr_mrg_rxbuf mhdr;
1398    unsigned mhdr_cnt = 0;
1399    size_t offset, i, guest_offset;
1400
1401    if (!virtio_net_can_receive(nc)) {
1402        return -1;
1403    }
1404
1405    /* hdr_len refers to the header we supply to the guest */
1406    if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1407        return 0;
1408    }
1409
1410    if (!receive_filter(n, buf, size))
1411        return size;
1412
1413    offset = i = 0;
1414
1415    while (offset < size) {
1416        VirtQueueElement *elem;
1417        int len, total;
1418        const struct iovec *sg;
1419
1420        total = 0;
1421
1422        elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1423        if (!elem) {
1424            if (i) {
1425                virtio_error(vdev, "virtio-net unexpected empty queue: "
1426                             "i %zd mergeable %d offset %zd, size %zd, "
1427                             "guest hdr len %zd, host hdr len %zd "
1428                             "guest features 0x%" PRIx64,
1429                             i, n->mergeable_rx_bufs, offset, size,
1430                             n->guest_hdr_len, n->host_hdr_len,
1431                             vdev->guest_features);
1432            }
1433            return -1;
1434        }
1435
1436        if (elem->in_num < 1) {
1437            virtio_error(vdev,
1438                         "virtio-net receive queue contains no in buffers");
1439            virtqueue_detach_element(q->rx_vq, elem, 0);
1440            g_free(elem);
1441            return -1;
1442        }
1443
1444        sg = elem->in_sg;
1445        if (i == 0) {
1446            assert(offset == 0);
1447            if (n->mergeable_rx_bufs) {
1448                mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1449                                    sg, elem->in_num,
1450                                    offsetof(typeof(mhdr), num_buffers),
1451                                    sizeof(mhdr.num_buffers));
1452            }
1453
1454            receive_header(n, sg, elem->in_num, buf, size);
1455            offset = n->host_hdr_len;
1456            total += n->guest_hdr_len;
1457            guest_offset = n->guest_hdr_len;
1458        } else {
1459            guest_offset = 0;
1460        }
1461
1462        /* copy in packet.  ugh */
1463        len = iov_from_buf(sg, elem->in_num, guest_offset,
1464                           buf + offset, size - offset);
1465        total += len;
1466        offset += len;
1467        /* If buffers can't be merged, at this point we
1468         * must have consumed the complete packet.
1469         * Otherwise, drop it. */
1470        if (!n->mergeable_rx_bufs && offset < size) {
1471            virtqueue_unpop(q->rx_vq, elem, total);
1472            g_free(elem);
1473            return size;
1474        }
1475
1476        /* signal other side */
1477        virtqueue_fill(q->rx_vq, elem, total, i++);
1478        g_free(elem);
1479    }
1480
1481    if (mhdr_cnt) {
1482        virtio_stw_p(vdev, &mhdr.num_buffers, i);
1483        iov_from_buf(mhdr_sg, mhdr_cnt,
1484                     0,
1485                     &mhdr.num_buffers, sizeof mhdr.num_buffers);
1486    }
1487
1488    virtqueue_flush(q->rx_vq, i);
1489    virtio_notify(vdev, q->rx_vq);
1490
1491    return size;
1492}
1493
1494static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1495                                  size_t size)
1496{
1497    RCU_READ_LOCK_GUARD();
1498
1499    return virtio_net_receive_rcu(nc, buf, size);
1500}
1501
1502static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1503                                         const uint8_t *buf,
1504                                         VirtioNetRscUnit *unit)
1505{
1506    uint16_t ip_hdrlen;
1507    struct ip_header *ip;
1508
1509    ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1510                              + sizeof(struct eth_header));
1511    unit->ip = (void *)ip;
1512    ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1513    unit->ip_plen = &ip->ip_len;
1514    unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1515    unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1516    unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1517}
1518
1519static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1520                                         const uint8_t *buf,
1521                                         VirtioNetRscUnit *unit)
1522{
1523    struct ip6_header *ip6;
1524
1525    ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1526                                 + sizeof(struct eth_header));
1527    unit->ip = ip6;
1528    unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1529    unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)\
1530                                        + sizeof(struct ip6_header));
1531    unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1532
1533    /* There is a difference between payload lenght in ipv4 and v6,
1534       ip header is excluded in ipv6 */
1535    unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1536}
1537
1538static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1539                                       VirtioNetRscSeg *seg)
1540{
1541    int ret;
1542    struct virtio_net_hdr *h;
1543
1544    h = (struct virtio_net_hdr *)seg->buf;
1545    h->flags = 0;
1546    h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1547
1548    if (seg->is_coalesced) {
1549        *virtio_net_rsc_ext_num_packets(h) = seg->packets;
1550        *virtio_net_rsc_ext_num_dupacks(h) = seg->dup_ack;
1551        h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1552        if (chain->proto == ETH_P_IP) {
1553            h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1554        } else {
1555            h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1556        }
1557    }
1558
1559    ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1560    QTAILQ_REMOVE(&chain->buffers, seg, next);
1561    g_free(seg->buf);
1562    g_free(seg);
1563
1564    return ret;
1565}
1566
1567static void virtio_net_rsc_purge(void *opq)
1568{
1569    VirtioNetRscSeg *seg, *rn;
1570    VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1571
1572    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1573        if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1574            chain->stat.purge_failed++;
1575            continue;
1576        }
1577    }
1578
1579    chain->stat.timer++;
1580    if (!QTAILQ_EMPTY(&chain->buffers)) {
1581        timer_mod(chain->drain_timer,
1582              qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1583    }
1584}
1585
1586static void virtio_net_rsc_cleanup(VirtIONet *n)
1587{
1588    VirtioNetRscChain *chain, *rn_chain;
1589    VirtioNetRscSeg *seg, *rn_seg;
1590
1591    QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1592        QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1593            QTAILQ_REMOVE(&chain->buffers, seg, next);
1594            g_free(seg->buf);
1595            g_free(seg);
1596        }
1597
1598        timer_del(chain->drain_timer);
1599        timer_free(chain->drain_timer);
1600        QTAILQ_REMOVE(&n->rsc_chains, chain, next);
1601        g_free(chain);
1602    }
1603}
1604
1605static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
1606                                     NetClientState *nc,
1607                                     const uint8_t *buf, size_t size)
1608{
1609    uint16_t hdr_len;
1610    VirtioNetRscSeg *seg;
1611
1612    hdr_len = chain->n->guest_hdr_len;
1613    seg = g_malloc(sizeof(VirtioNetRscSeg));
1614    seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
1615        + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
1616    memcpy(seg->buf, buf, size);
1617    seg->size = size;
1618    seg->packets = 1;
1619    seg->dup_ack = 0;
1620    seg->is_coalesced = 0;
1621    seg->nc = nc;
1622
1623    QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
1624    chain->stat.cache++;
1625
1626    switch (chain->proto) {
1627    case ETH_P_IP:
1628        virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
1629        break;
1630    case ETH_P_IPV6:
1631        virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
1632        break;
1633    default:
1634        g_assert_not_reached();
1635    }
1636}
1637
1638static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
1639                                         VirtioNetRscSeg *seg,
1640                                         const uint8_t *buf,
1641                                         struct tcp_header *n_tcp,
1642                                         struct tcp_header *o_tcp)
1643{
1644    uint32_t nack, oack;
1645    uint16_t nwin, owin;
1646
1647    nack = htonl(n_tcp->th_ack);
1648    nwin = htons(n_tcp->th_win);
1649    oack = htonl(o_tcp->th_ack);
1650    owin = htons(o_tcp->th_win);
1651
1652    if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
1653        chain->stat.ack_out_of_win++;
1654        return RSC_FINAL;
1655    } else if (nack == oack) {
1656        /* duplicated ack or window probe */
1657        if (nwin == owin) {
1658            /* duplicated ack, add dup ack count due to whql test up to 1 */
1659            chain->stat.dup_ack++;
1660            return RSC_FINAL;
1661        } else {
1662            /* Coalesce window update */
1663            o_tcp->th_win = n_tcp->th_win;
1664            chain->stat.win_update++;
1665            return RSC_COALESCE;
1666        }
1667    } else {
1668        /* pure ack, go to 'C', finalize*/
1669        chain->stat.pure_ack++;
1670        return RSC_FINAL;
1671    }
1672}
1673
1674static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
1675                                            VirtioNetRscSeg *seg,
1676                                            const uint8_t *buf,
1677                                            VirtioNetRscUnit *n_unit)
1678{
1679    void *data;
1680    uint16_t o_ip_len;
1681    uint32_t nseq, oseq;
1682    VirtioNetRscUnit *o_unit;
1683
1684    o_unit = &seg->unit;
1685    o_ip_len = htons(*o_unit->ip_plen);
1686    nseq = htonl(n_unit->tcp->th_seq);
1687    oseq = htonl(o_unit->tcp->th_seq);
1688
1689    /* out of order or retransmitted. */
1690    if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
1691        chain->stat.data_out_of_win++;
1692        return RSC_FINAL;
1693    }
1694
1695    data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
1696    if (nseq == oseq) {
1697        if ((o_unit->payload == 0) && n_unit->payload) {
1698            /* From no payload to payload, normal case, not a dup ack or etc */
1699            chain->stat.data_after_pure_ack++;
1700            goto coalesce;
1701        } else {
1702            return virtio_net_rsc_handle_ack(chain, seg, buf,
1703                                             n_unit->tcp, o_unit->tcp);
1704        }
1705    } else if ((nseq - oseq) != o_unit->payload) {
1706        /* Not a consistent packet, out of order */
1707        chain->stat.data_out_of_order++;
1708        return RSC_FINAL;
1709    } else {
1710coalesce:
1711        if ((o_ip_len + n_unit->payload) > chain->max_payload) {
1712            chain->stat.over_size++;
1713            return RSC_FINAL;
1714        }
1715
1716        /* Here comes the right data, the payload length in v4/v6 is different,
1717           so use the field value to update and record the new data len */
1718        o_unit->payload += n_unit->payload; /* update new data len */
1719
1720        /* update field in ip header */
1721        *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
1722
1723        /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
1724           for windows guest, while this may change the behavior for linux
1725           guest (only if it uses RSC feature). */
1726        o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
1727
1728        o_unit->tcp->th_ack = n_unit->tcp->th_ack;
1729        o_unit->tcp->th_win = n_unit->tcp->th_win;
1730
1731        memmove(seg->buf + seg->size, data, n_unit->payload);
1732        seg->size += n_unit->payload;
1733        seg->packets++;
1734        chain->stat.coalesced++;
1735        return RSC_COALESCE;
1736    }
1737}
1738
1739static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
1740                                        VirtioNetRscSeg *seg,
1741                                        const uint8_t *buf, size_t size,
1742                                        VirtioNetRscUnit *unit)
1743{
1744    struct ip_header *ip1, *ip2;
1745
1746    ip1 = (struct ip_header *)(unit->ip);
1747    ip2 = (struct ip_header *)(seg->unit.ip);
1748    if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
1749        || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
1750        || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
1751        chain->stat.no_match++;
1752        return RSC_NO_MATCH;
1753    }
1754
1755    return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
1756}
1757
1758static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
1759                                        VirtioNetRscSeg *seg,
1760                                        const uint8_t *buf, size_t size,
1761                                        VirtioNetRscUnit *unit)
1762{
1763    struct ip6_header *ip1, *ip2;
1764
1765    ip1 = (struct ip6_header *)(unit->ip);
1766    ip2 = (struct ip6_header *)(seg->unit.ip);
1767    if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
1768        || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
1769        || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
1770        || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
1771            chain->stat.no_match++;
1772            return RSC_NO_MATCH;
1773    }
1774
1775    return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
1776}
1777
1778/* Packets with 'SYN' should bypass, other flag should be sent after drain
1779 * to prevent out of order */
1780static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
1781                                         struct tcp_header *tcp)
1782{
1783    uint16_t tcp_hdr;
1784    uint16_t tcp_flag;
1785
1786    tcp_flag = htons(tcp->th_offset_flags);
1787    tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
1788    tcp_flag &= VIRTIO_NET_TCP_FLAG;
1789    tcp_flag = htons(tcp->th_offset_flags) & 0x3F;
1790    if (tcp_flag & TH_SYN) {
1791        chain->stat.tcp_syn++;
1792        return RSC_BYPASS;
1793    }
1794
1795    if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
1796        chain->stat.tcp_ctrl_drain++;
1797        return RSC_FINAL;
1798    }
1799
1800    if (tcp_hdr > sizeof(struct tcp_header)) {
1801        chain->stat.tcp_all_opt++;
1802        return RSC_FINAL;
1803    }
1804
1805    return RSC_CANDIDATE;
1806}
1807
1808static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
1809                                         NetClientState *nc,
1810                                         const uint8_t *buf, size_t size,
1811                                         VirtioNetRscUnit *unit)
1812{
1813    int ret;
1814    VirtioNetRscSeg *seg, *nseg;
1815
1816    if (QTAILQ_EMPTY(&chain->buffers)) {
1817        chain->stat.empty_cache++;
1818        virtio_net_rsc_cache_buf(chain, nc, buf, size);
1819        timer_mod(chain->drain_timer,
1820              qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1821        return size;
1822    }
1823
1824    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
1825        if (chain->proto == ETH_P_IP) {
1826            ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
1827        } else {
1828            ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
1829        }
1830
1831        if (ret == RSC_FINAL) {
1832            if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1833                /* Send failed */
1834                chain->stat.final_failed++;
1835                return 0;
1836            }
1837
1838            /* Send current packet */
1839            return virtio_net_do_receive(nc, buf, size);
1840        } else if (ret == RSC_NO_MATCH) {
1841            continue;
1842        } else {
1843            /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
1844            seg->is_coalesced = 1;
1845            return size;
1846        }
1847    }
1848
1849    chain->stat.no_match_cache++;
1850    virtio_net_rsc_cache_buf(chain, nc, buf, size);
1851    return size;
1852}
1853
1854/* Drain a connection data, this is to avoid out of order segments */
1855static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
1856                                        NetClientState *nc,
1857                                        const uint8_t *buf, size_t size,
1858                                        uint16_t ip_start, uint16_t ip_size,
1859                                        uint16_t tcp_port)
1860{
1861    VirtioNetRscSeg *seg, *nseg;
1862    uint32_t ppair1, ppair2;
1863
1864    ppair1 = *(uint32_t *)(buf + tcp_port);
1865    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
1866        ppair2 = *(uint32_t *)(seg->buf + tcp_port);
1867        if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
1868            || (ppair1 != ppair2)) {
1869            continue;
1870        }
1871        if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1872            chain->stat.drain_failed++;
1873        }
1874
1875        break;
1876    }
1877
1878    return virtio_net_do_receive(nc, buf, size);
1879}
1880
1881static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
1882                                            struct ip_header *ip,
1883                                            const uint8_t *buf, size_t size)
1884{
1885    uint16_t ip_len;
1886
1887    /* Not an ipv4 packet */
1888    if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
1889        chain->stat.ip_option++;
1890        return RSC_BYPASS;
1891    }
1892
1893    /* Don't handle packets with ip option */
1894    if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
1895        chain->stat.ip_option++;
1896        return RSC_BYPASS;
1897    }
1898
1899    if (ip->ip_p != IPPROTO_TCP) {
1900        chain->stat.bypass_not_tcp++;
1901        return RSC_BYPASS;
1902    }
1903
1904    /* Don't handle packets with ip fragment */
1905    if (!(htons(ip->ip_off) & IP_DF)) {
1906        chain->stat.ip_frag++;
1907        return RSC_BYPASS;
1908    }
1909
1910    /* Don't handle packets with ecn flag */
1911    if (IPTOS_ECN(ip->ip_tos)) {
1912        chain->stat.ip_ecn++;
1913        return RSC_BYPASS;
1914    }
1915
1916    ip_len = htons(ip->ip_len);
1917    if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
1918        || ip_len > (size - chain->n->guest_hdr_len -
1919                     sizeof(struct eth_header))) {
1920        chain->stat.ip_hacked++;
1921        return RSC_BYPASS;
1922    }
1923
1924    return RSC_CANDIDATE;
1925}
1926
1927static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
1928                                      NetClientState *nc,
1929                                      const uint8_t *buf, size_t size)
1930{
1931    int32_t ret;
1932    uint16_t hdr_len;
1933    VirtioNetRscUnit unit;
1934
1935    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
1936
1937    if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
1938        + sizeof(struct tcp_header))) {
1939        chain->stat.bypass_not_tcp++;
1940        return virtio_net_do_receive(nc, buf, size);
1941    }
1942
1943    virtio_net_rsc_extract_unit4(chain, buf, &unit);
1944    if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
1945        != RSC_CANDIDATE) {
1946        return virtio_net_do_receive(nc, buf, size);
1947    }
1948
1949    ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
1950    if (ret == RSC_BYPASS) {
1951        return virtio_net_do_receive(nc, buf, size);
1952    } else if (ret == RSC_FINAL) {
1953        return virtio_net_rsc_drain_flow(chain, nc, buf, size,
1954                ((hdr_len + sizeof(struct eth_header)) + 12),
1955                VIRTIO_NET_IP4_ADDR_SIZE,
1956                hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
1957    }
1958
1959    return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
1960}
1961
1962static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
1963                                            struct ip6_header *ip6,
1964                                            const uint8_t *buf, size_t size)
1965{
1966    uint16_t ip_len;
1967
1968    if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
1969        != IP_HEADER_VERSION_6) {
1970        return RSC_BYPASS;
1971    }
1972
1973    /* Both option and protocol is checked in this */
1974    if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
1975        chain->stat.bypass_not_tcp++;
1976        return RSC_BYPASS;
1977    }
1978
1979    ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1980    if (ip_len < sizeof(struct tcp_header) ||
1981        ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
1982                  - sizeof(struct ip6_header))) {
1983        chain->stat.ip_hacked++;
1984        return RSC_BYPASS;
1985    }
1986
1987    /* Don't handle packets with ecn flag */
1988    if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
1989        chain->stat.ip_ecn++;
1990        return RSC_BYPASS;
1991    }
1992
1993    return RSC_CANDIDATE;
1994}
1995
1996static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
1997                                      const uint8_t *buf, size_t size)
1998{
1999    int32_t ret;
2000    uint16_t hdr_len;
2001    VirtioNetRscChain *chain;
2002    VirtioNetRscUnit unit;
2003
2004    chain = (VirtioNetRscChain *)opq;
2005    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2006
2007    if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2008        + sizeof(tcp_header))) {
2009        return virtio_net_do_receive(nc, buf, size);
2010    }
2011
2012    virtio_net_rsc_extract_unit6(chain, buf, &unit);
2013    if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2014                                                 unit.ip, buf, size)) {
2015        return virtio_net_do_receive(nc, buf, size);
2016    }
2017
2018    ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2019    if (ret == RSC_BYPASS) {
2020        return virtio_net_do_receive(nc, buf, size);
2021    } else if (ret == RSC_FINAL) {
2022        return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2023                ((hdr_len + sizeof(struct eth_header)) + 8),
2024                VIRTIO_NET_IP6_ADDR_SIZE,
2025                hdr_len + sizeof(struct eth_header)
2026                + sizeof(struct ip6_header));
2027    }
2028
2029    return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2030}
2031
2032static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2033                                                      NetClientState *nc,
2034                                                      uint16_t proto)
2035{
2036    VirtioNetRscChain *chain;
2037
2038    if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2039        return NULL;
2040    }
2041
2042    QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2043        if (chain->proto == proto) {
2044            return chain;
2045        }
2046    }
2047
2048    chain = g_malloc(sizeof(*chain));
2049    chain->n = n;
2050    chain->proto = proto;
2051    if (proto == (uint16_t)ETH_P_IP) {
2052        chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2053        chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2054    } else {
2055        chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2056        chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2057    }
2058    chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2059                                      virtio_net_rsc_purge, chain);
2060    memset(&chain->stat, 0, sizeof(chain->stat));
2061
2062    QTAILQ_INIT(&chain->buffers);
2063    QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2064
2065    return chain;
2066}
2067
2068static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2069                                      const uint8_t *buf,
2070                                      size_t size)
2071{
2072    uint16_t proto;
2073    VirtioNetRscChain *chain;
2074    struct eth_header *eth;
2075    VirtIONet *n;
2076
2077    n = qemu_get_nic_opaque(nc);
2078    if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2079        return virtio_net_do_receive(nc, buf, size);
2080    }
2081
2082    eth = (struct eth_header *)(buf + n->guest_hdr_len);
2083    proto = htons(eth->h_proto);
2084
2085    chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2086    if (chain) {
2087        chain->stat.received++;
2088        if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2089            return virtio_net_rsc_receive4(chain, nc, buf, size);
2090        } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2091            return virtio_net_rsc_receive6(chain, nc, buf, size);
2092        }
2093    }
2094    return virtio_net_do_receive(nc, buf, size);
2095}
2096
2097static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2098                                  size_t size)
2099{
2100    VirtIONet *n = qemu_get_nic_opaque(nc);
2101    if ((n->rsc4_enabled || n->rsc6_enabled)) {
2102        return virtio_net_rsc_receive(nc, buf, size);
2103    } else {
2104        return virtio_net_do_receive(nc, buf, size);
2105    }
2106}
2107
2108static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2109
2110static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2111{
2112    VirtIONet *n = qemu_get_nic_opaque(nc);
2113    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2114    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2115
2116    virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2117    virtio_notify(vdev, q->tx_vq);
2118
2119    g_free(q->async_tx.elem);
2120    q->async_tx.elem = NULL;
2121
2122    virtio_queue_set_notification(q->tx_vq, 1);
2123    virtio_net_flush_tx(q);
2124}
2125
2126/* TX */
2127static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2128{
2129    VirtIONet *n = q->n;
2130    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2131    VirtQueueElement *elem;
2132    int32_t num_packets = 0;
2133    int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2134    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2135        return num_packets;
2136    }
2137
2138    if (q->async_tx.elem) {
2139        virtio_queue_set_notification(q->tx_vq, 0);
2140        return num_packets;
2141    }
2142
2143    for (;;) {
2144        ssize_t ret;
2145        unsigned int out_num;
2146        struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2147        struct virtio_net_hdr_mrg_rxbuf mhdr;
2148
2149        elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2150        if (!elem) {
2151            break;
2152        }
2153
2154        out_num = elem->out_num;
2155        out_sg = elem->out_sg;
2156        if (out_num < 1) {
2157            virtio_error(vdev, "virtio-net header not in first element");
2158            virtqueue_detach_element(q->tx_vq, elem, 0);
2159            g_free(elem);
2160            return -EINVAL;
2161        }
2162
2163        if (n->has_vnet_hdr) {
2164            if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2165                n->guest_hdr_len) {
2166                virtio_error(vdev, "virtio-net header incorrect");
2167                virtqueue_detach_element(q->tx_vq, elem, 0);
2168                g_free(elem);
2169                return -EINVAL;
2170            }
2171            if (n->needs_vnet_hdr_swap) {
2172                virtio_net_hdr_swap(vdev, (void *) &mhdr);
2173                sg2[0].iov_base = &mhdr;
2174                sg2[0].iov_len = n->guest_hdr_len;
2175                out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2176                                   out_sg, out_num,
2177                                   n->guest_hdr_len, -1);
2178                if (out_num == VIRTQUEUE_MAX_SIZE) {
2179                    goto drop;
2180                }
2181                out_num += 1;
2182                out_sg = sg2;
2183            }
2184        }
2185        /*
2186         * If host wants to see the guest header as is, we can
2187         * pass it on unchanged. Otherwise, copy just the parts
2188         * that host is interested in.
2189         */
2190        assert(n->host_hdr_len <= n->guest_hdr_len);
2191        if (n->host_hdr_len != n->guest_hdr_len) {
2192            unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2193                                       out_sg, out_num,
2194                                       0, n->host_hdr_len);
2195            sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2196                             out_sg, out_num,
2197                             n->guest_hdr_len, -1);
2198            out_num = sg_num;
2199            out_sg = sg;
2200        }
2201
2202        ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2203                                      out_sg, out_num, virtio_net_tx_complete);
2204        if (ret == 0) {
2205            virtio_queue_set_notification(q->tx_vq, 0);
2206            q->async_tx.elem = elem;
2207            return -EBUSY;
2208        }
2209
2210drop:
2211        virtqueue_push(q->tx_vq, elem, 0);
2212        virtio_notify(vdev, q->tx_vq);
2213        g_free(elem);
2214
2215        if (++num_packets >= n->tx_burst) {
2216            break;
2217        }
2218    }
2219    return num_packets;
2220}
2221
2222static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2223{
2224    VirtIONet *n = VIRTIO_NET(vdev);
2225    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2226
2227    if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2228        virtio_net_drop_tx_queue_data(vdev, vq);
2229        return;
2230    }
2231
2232    /* This happens when device was stopped but VCPU wasn't. */
2233    if (!vdev->vm_running) {
2234        q->tx_waiting = 1;
2235        return;
2236    }
2237
2238    if (q->tx_waiting) {
2239        virtio_queue_set_notification(vq, 1);
2240        timer_del(q->tx_timer);
2241        q->tx_waiting = 0;
2242        if (virtio_net_flush_tx(q) == -EINVAL) {
2243            return;
2244        }
2245    } else {
2246        timer_mod(q->tx_timer,
2247                       qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2248        q->tx_waiting = 1;
2249        virtio_queue_set_notification(vq, 0);
2250    }
2251}
2252
2253static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2254{
2255    VirtIONet *n = VIRTIO_NET(vdev);
2256    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2257
2258    if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2259        virtio_net_drop_tx_queue_data(vdev, vq);
2260        return;
2261    }
2262
2263    if (unlikely(q->tx_waiting)) {
2264        return;
2265    }
2266    q->tx_waiting = 1;
2267    /* This happens when device was stopped but VCPU wasn't. */
2268    if (!vdev->vm_running) {
2269        return;
2270    }
2271    virtio_queue_set_notification(vq, 0);
2272    qemu_bh_schedule(q->tx_bh);
2273}
2274
2275static void virtio_net_tx_timer(void *opaque)
2276{
2277    VirtIONetQueue *q = opaque;
2278    VirtIONet *n = q->n;
2279    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2280    /* This happens when device was stopped but BH wasn't. */
2281    if (!vdev->vm_running) {
2282        /* Make sure tx waiting is set, so we'll run when restarted. */
2283        assert(q->tx_waiting);
2284        return;
2285    }
2286
2287    q->tx_waiting = 0;
2288
2289    /* Just in case the driver is not ready on more */
2290    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2291        return;
2292    }
2293
2294    virtio_queue_set_notification(q->tx_vq, 1);
2295    virtio_net_flush_tx(q);
2296}
2297
2298static void virtio_net_tx_bh(void *opaque)
2299{
2300    VirtIONetQueue *q = opaque;
2301    VirtIONet *n = q->n;
2302    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2303    int32_t ret;
2304
2305    /* This happens when device was stopped but BH wasn't. */
2306    if (!vdev->vm_running) {
2307        /* Make sure tx waiting is set, so we'll run when restarted. */
2308        assert(q->tx_waiting);
2309        return;
2310    }
2311
2312    q->tx_waiting = 0;
2313
2314    /* Just in case the driver is not ready on more */
2315    if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2316        return;
2317    }
2318
2319    ret = virtio_net_flush_tx(q);
2320    if (ret == -EBUSY || ret == -EINVAL) {
2321        return; /* Notification re-enable handled by tx_complete or device
2322                 * broken */
2323    }
2324
2325    /* If we flush a full burst of packets, assume there are
2326     * more coming and immediately reschedule */
2327    if (ret >= n->tx_burst) {
2328        qemu_bh_schedule(q->tx_bh);
2329        q->tx_waiting = 1;
2330        return;
2331    }
2332
2333    /* If less than a full burst, re-enable notification and flush
2334     * anything that may have come in while we weren't looking.  If
2335     * we find something, assume the guest is still active and reschedule */
2336    virtio_queue_set_notification(q->tx_vq, 1);
2337    ret = virtio_net_flush_tx(q);
2338    if (ret == -EINVAL) {
2339        return;
2340    } else if (ret > 0) {
2341        virtio_queue_set_notification(q->tx_vq, 0);
2342        qemu_bh_schedule(q->tx_bh);
2343        q->tx_waiting = 1;
2344    }
2345}
2346
2347static void virtio_net_add_queue(VirtIONet *n, int index)
2348{
2349    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2350
2351    n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2352                                           virtio_net_handle_rx);
2353
2354    if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2355        n->vqs[index].tx_vq =
2356            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2357                             virtio_net_handle_tx_timer);
2358        n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2359                                              virtio_net_tx_timer,
2360                                              &n->vqs[index]);
2361    } else {
2362        n->vqs[index].tx_vq =
2363            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2364                             virtio_net_handle_tx_bh);
2365        n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2366    }
2367
2368    n->vqs[index].tx_waiting = 0;
2369    n->vqs[index].n = n;
2370}
2371
2372static void virtio_net_del_queue(VirtIONet *n, int index)
2373{
2374    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2375    VirtIONetQueue *q = &n->vqs[index];
2376    NetClientState *nc = qemu_get_subqueue(n->nic, index);
2377
2378    qemu_purge_queued_packets(nc);
2379
2380    virtio_del_queue(vdev, index * 2);
2381    if (q->tx_timer) {
2382        timer_del(q->tx_timer);
2383        timer_free(q->tx_timer);
2384        q->tx_timer = NULL;
2385    } else {
2386        qemu_bh_delete(q->tx_bh);
2387        q->tx_bh = NULL;
2388    }
2389    q->tx_waiting = 0;
2390    virtio_del_queue(vdev, index * 2 + 1);
2391}
2392
2393static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
2394{
2395    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2396    int old_num_queues = virtio_get_num_queues(vdev);
2397    int new_num_queues = new_max_queues * 2 + 1;
2398    int i;
2399
2400    assert(old_num_queues >= 3);
2401    assert(old_num_queues % 2 == 1);
2402
2403    if (old_num_queues == new_num_queues) {
2404        return;
2405    }
2406
2407    /*
2408     * We always need to remove and add ctrl vq if
2409     * old_num_queues != new_num_queues. Remove ctrl_vq first,
2410     * and then we only enter one of the following two loops.
2411     */
2412    virtio_del_queue(vdev, old_num_queues - 1);
2413
2414    for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2415        /* new_num_queues < old_num_queues */
2416        virtio_net_del_queue(n, i / 2);
2417    }
2418
2419    for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2420        /* new_num_queues > old_num_queues */
2421        virtio_net_add_queue(n, i / 2);
2422    }
2423
2424    /* add ctrl_vq last */
2425    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2426}
2427
2428static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2429{
2430    int max = multiqueue ? n->max_queues : 1;
2431
2432    n->multiqueue = multiqueue;
2433    virtio_net_change_num_queues(n, max);
2434
2435    virtio_net_set_queues(n);
2436}
2437
2438static int virtio_net_post_load_device(void *opaque, int version_id)
2439{
2440    VirtIONet *n = opaque;
2441    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2442    int i, link_down;
2443
2444    trace_virtio_net_post_load_device();
2445    virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2446                               virtio_vdev_has_feature(vdev,
2447                                                       VIRTIO_F_VERSION_1));
2448
2449    /* MAC_TABLE_ENTRIES may be different from the saved image */
2450    if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2451        n->mac_table.in_use = 0;
2452    }
2453
2454    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2455        n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2456    }
2457
2458    /*
2459     * curr_guest_offloads will be later overwritten by the
2460     * virtio_set_features_nocheck call done from the virtio_load.
2461     * Here we make sure it is preserved and restored accordingly
2462     * in the virtio_net_post_load_virtio callback.
2463     */
2464    n->saved_guest_offloads = n->curr_guest_offloads;
2465
2466    virtio_net_set_queues(n);
2467
2468    /* Find the first multicast entry in the saved MAC filter */
2469    for (i = 0; i < n->mac_table.in_use; i++) {
2470        if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2471            break;
2472        }
2473    }
2474    n->mac_table.first_multi = i;
2475
2476    /* nc.link_down can't be migrated, so infer link_down according
2477     * to link status bit in n->status */
2478    link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2479    for (i = 0; i < n->max_queues; i++) {
2480        qemu_get_subqueue(n->nic, i)->link_down = link_down;
2481    }
2482
2483    if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2484        virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2485        qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2486                                  QEMU_CLOCK_VIRTUAL,
2487                                  virtio_net_announce_timer, n);
2488        if (n->announce_timer.round) {
2489            timer_mod(n->announce_timer.tm,
2490                      qemu_clock_get_ms(n->announce_timer.type));
2491        } else {
2492            qemu_announce_timer_del(&n->announce_timer, false);
2493        }
2494    }
2495
2496    return 0;
2497}
2498
2499static int virtio_net_post_load_virtio(VirtIODevice *vdev)
2500{
2501    VirtIONet *n = VIRTIO_NET(vdev);
2502    /*
2503     * The actual needed state is now in saved_guest_offloads,
2504     * see virtio_net_post_load_device for detail.
2505     * Restore it back and apply the desired offloads.
2506     */
2507    n->curr_guest_offloads = n->saved_guest_offloads;
2508    if (peer_has_vnet_hdr(n)) {
2509        virtio_net_apply_guest_offloads(n);
2510    }
2511
2512    return 0;
2513}
2514
2515/* tx_waiting field of a VirtIONetQueue */
2516static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2517    .name = "virtio-net-queue-tx_waiting",
2518    .fields = (VMStateField[]) {
2519        VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2520        VMSTATE_END_OF_LIST()
2521   },
2522};
2523
2524static bool max_queues_gt_1(void *opaque, int version_id)
2525{
2526    return VIRTIO_NET(opaque)->max_queues > 1;
2527}
2528
2529static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2530{
2531    return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2532                                   VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2533}
2534
2535static bool mac_table_fits(void *opaque, int version_id)
2536{
2537    return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2538}
2539
2540static bool mac_table_doesnt_fit(void *opaque, int version_id)
2541{
2542    return !mac_table_fits(opaque, version_id);
2543}
2544
2545/* This temporary type is shared by all the WITH_TMP methods
2546 * although only some fields are used by each.
2547 */
2548struct VirtIONetMigTmp {
2549    VirtIONet      *parent;
2550    VirtIONetQueue *vqs_1;
2551    uint16_t        curr_queues_1;
2552    uint8_t         has_ufo;
2553    uint32_t        has_vnet_hdr;
2554};
2555
2556/* The 2nd and subsequent tx_waiting flags are loaded later than
2557 * the 1st entry in the queues and only if there's more than one
2558 * entry.  We use the tmp mechanism to calculate a temporary
2559 * pointer and count and also validate the count.
2560 */
2561
2562static int virtio_net_tx_waiting_pre_save(void *opaque)
2563{
2564    struct VirtIONetMigTmp *tmp = opaque;
2565
2566    tmp->vqs_1 = tmp->parent->vqs + 1;
2567    tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
2568    if (tmp->parent->curr_queues == 0) {
2569        tmp->curr_queues_1 = 0;
2570    }
2571
2572    return 0;
2573}
2574
2575static int virtio_net_tx_waiting_pre_load(void *opaque)
2576{
2577    struct VirtIONetMigTmp *tmp = opaque;
2578
2579    /* Reuse the pointer setup from save */
2580    virtio_net_tx_waiting_pre_save(opaque);
2581
2582    if (tmp->parent->curr_queues > tmp->parent->max_queues) {
2583        error_report("virtio-net: curr_queues %x > max_queues %x",
2584            tmp->parent->curr_queues, tmp->parent->max_queues);
2585
2586        return -EINVAL;
2587    }
2588
2589    return 0; /* all good */
2590}
2591
2592static const VMStateDescription vmstate_virtio_net_tx_waiting = {
2593    .name      = "virtio-net-tx_waiting",
2594    .pre_load  = virtio_net_tx_waiting_pre_load,
2595    .pre_save  = virtio_net_tx_waiting_pre_save,
2596    .fields    = (VMStateField[]) {
2597        VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
2598                                     curr_queues_1,
2599                                     vmstate_virtio_net_queue_tx_waiting,
2600                                     struct VirtIONetQueue),
2601        VMSTATE_END_OF_LIST()
2602    },
2603};
2604
2605/* the 'has_ufo' flag is just tested; if the incoming stream has the
2606 * flag set we need to check that we have it
2607 */
2608static int virtio_net_ufo_post_load(void *opaque, int version_id)
2609{
2610    struct VirtIONetMigTmp *tmp = opaque;
2611
2612    if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
2613        error_report("virtio-net: saved image requires TUN_F_UFO support");
2614        return -EINVAL;
2615    }
2616
2617    return 0;
2618}
2619
2620static int virtio_net_ufo_pre_save(void *opaque)
2621{
2622    struct VirtIONetMigTmp *tmp = opaque;
2623
2624    tmp->has_ufo = tmp->parent->has_ufo;
2625
2626    return 0;
2627}
2628
2629static const VMStateDescription vmstate_virtio_net_has_ufo = {
2630    .name      = "virtio-net-ufo",
2631    .post_load = virtio_net_ufo_post_load,
2632    .pre_save  = virtio_net_ufo_pre_save,
2633    .fields    = (VMStateField[]) {
2634        VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
2635        VMSTATE_END_OF_LIST()
2636    },
2637};
2638
2639/* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
2640 * flag set we need to check that we have it
2641 */
2642static int virtio_net_vnet_post_load(void *opaque, int version_id)
2643{
2644    struct VirtIONetMigTmp *tmp = opaque;
2645
2646    if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
2647        error_report("virtio-net: saved image requires vnet_hdr=on");
2648        return -EINVAL;
2649    }
2650
2651    return 0;
2652}
2653
2654static int virtio_net_vnet_pre_save(void *opaque)
2655{
2656    struct VirtIONetMigTmp *tmp = opaque;
2657
2658    tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
2659
2660    return 0;
2661}
2662
2663static const VMStateDescription vmstate_virtio_net_has_vnet = {
2664    .name      = "virtio-net-vnet",
2665    .post_load = virtio_net_vnet_post_load,
2666    .pre_save  = virtio_net_vnet_pre_save,
2667    .fields    = (VMStateField[]) {
2668        VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
2669        VMSTATE_END_OF_LIST()
2670    },
2671};
2672
2673static const VMStateDescription vmstate_virtio_net_device = {
2674    .name = "virtio-net-device",
2675    .version_id = VIRTIO_NET_VM_VERSION,
2676    .minimum_version_id = VIRTIO_NET_VM_VERSION,
2677    .post_load = virtio_net_post_load_device,
2678    .fields = (VMStateField[]) {
2679        VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
2680        VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
2681                               vmstate_virtio_net_queue_tx_waiting,
2682                               VirtIONetQueue),
2683        VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
2684        VMSTATE_UINT16(status, VirtIONet),
2685        VMSTATE_UINT8(promisc, VirtIONet),
2686        VMSTATE_UINT8(allmulti, VirtIONet),
2687        VMSTATE_UINT32(mac_table.in_use, VirtIONet),
2688
2689        /* Guarded pair: If it fits we load it, else we throw it away
2690         * - can happen if source has a larger MAC table.; post-load
2691         *  sets flags in this case.
2692         */
2693        VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
2694                                0, mac_table_fits, mac_table.in_use,
2695                                 ETH_ALEN),
2696        VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
2697                                     mac_table.in_use, ETH_ALEN),
2698
2699        /* Note: This is an array of uint32's that's always been saved as a
2700         * buffer; hold onto your endiannesses; it's actually used as a bitmap
2701         * but based on the uint.
2702         */
2703        VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
2704        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2705                         vmstate_virtio_net_has_vnet),
2706        VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
2707        VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
2708        VMSTATE_UINT8(alluni, VirtIONet),
2709        VMSTATE_UINT8(nomulti, VirtIONet),
2710        VMSTATE_UINT8(nouni, VirtIONet),
2711        VMSTATE_UINT8(nobcast, VirtIONet),
2712        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2713                         vmstate_virtio_net_has_ufo),
2714        VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
2715                            vmstate_info_uint16_equal, uint16_t),
2716        VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
2717        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2718                         vmstate_virtio_net_tx_waiting),
2719        VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
2720                            has_ctrl_guest_offloads),
2721        VMSTATE_END_OF_LIST()
2722   },
2723};
2724
2725static NetClientInfo net_virtio_info = {
2726    .type = NET_CLIENT_DRIVER_NIC,
2727    .size = sizeof(NICState),
2728    .can_receive = virtio_net_can_receive,
2729    .receive = virtio_net_receive,
2730    .link_status_changed = virtio_net_set_link_status,
2731    .query_rx_filter = virtio_net_query_rxfilter,
2732    .announce = virtio_net_announce,
2733};
2734
2735static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
2736{
2737    VirtIONet *n = VIRTIO_NET(vdev);
2738    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
2739    assert(n->vhost_started);
2740    return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
2741}
2742
2743static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
2744                                           bool mask)
2745{
2746    VirtIONet *n = VIRTIO_NET(vdev);
2747    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
2748    assert(n->vhost_started);
2749    vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
2750                             vdev, idx, mask);
2751}
2752
2753static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
2754{
2755    virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
2756
2757    n->config_size = virtio_feature_get_config_size(feature_sizes,
2758                                                    host_features);
2759}
2760
2761void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
2762                                   const char *type)
2763{
2764    /*
2765     * The name can be NULL, the netclient name will be type.x.
2766     */
2767    assert(type != NULL);
2768
2769    g_free(n->netclient_name);
2770    g_free(n->netclient_type);
2771    n->netclient_name = g_strdup(name);
2772    n->netclient_type = g_strdup(type);
2773}
2774
2775static bool failover_unplug_primary(VirtIONet *n)
2776{
2777    HotplugHandler *hotplug_ctrl;
2778    PCIDevice *pci_dev;
2779    Error *err = NULL;
2780
2781    hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev);
2782    if (hotplug_ctrl) {
2783        pci_dev = PCI_DEVICE(n->primary_dev);
2784        pci_dev->partially_hotplugged = true;
2785        hotplug_handler_unplug_request(hotplug_ctrl, n->primary_dev, &err);
2786        if (err) {
2787            error_report_err(err);
2788            return false;
2789        }
2790    } else {
2791        return false;
2792    }
2793    return true;
2794}
2795
2796static bool failover_replug_primary(VirtIONet *n, Error **errp)
2797{
2798    Error *err = NULL;
2799    HotplugHandler *hotplug_ctrl;
2800    PCIDevice *pdev = PCI_DEVICE(n->primary_dev);
2801
2802    if (!pdev->partially_hotplugged) {
2803        return true;
2804    }
2805    if (!n->primary_device_opts) {
2806        n->primary_device_opts = qemu_opts_from_qdict(
2807                qemu_find_opts("device"),
2808                n->primary_device_dict, errp);
2809        if (!n->primary_device_opts) {
2810            return false;
2811        }
2812    }
2813    n->primary_bus = n->primary_dev->parent_bus;
2814    if (!n->primary_bus) {
2815        error_setg(errp, "virtio_net: couldn't find primary bus");
2816        return false;
2817    }
2818    qdev_set_parent_bus(n->primary_dev, n->primary_bus);
2819    n->primary_should_be_hidden = false;
2820    qemu_opt_set_bool(n->primary_device_opts,
2821                      "partially_hotplugged", true, &err);
2822    if (err) {
2823        goto out;
2824    }
2825    hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev);
2826    if (hotplug_ctrl) {
2827        hotplug_handler_pre_plug(hotplug_ctrl, n->primary_dev, &err);
2828        if (err) {
2829            goto out;
2830        }
2831        hotplug_handler_plug(hotplug_ctrl, n->primary_dev, errp);
2832    }
2833
2834out:
2835    error_propagate(errp, err);
2836    return !err;
2837}
2838
2839static void virtio_net_handle_migration_primary(VirtIONet *n,
2840                                                MigrationState *s)
2841{
2842    bool should_be_hidden;
2843    Error *err = NULL;
2844
2845    should_be_hidden = atomic_read(&n->primary_should_be_hidden);
2846
2847    if (!n->primary_dev) {
2848        n->primary_dev = virtio_connect_failover_devices(n, n->qdev, &err);
2849        if (!n->primary_dev) {
2850            return;
2851        }
2852    }
2853
2854    if (migration_in_setup(s) && !should_be_hidden) {
2855        if (failover_unplug_primary(n)) {
2856            vmstate_unregister(n->primary_dev, qdev_get_vmsd(n->primary_dev),
2857                    n->primary_dev);
2858            qapi_event_send_unplug_primary(n->primary_device_id);
2859            atomic_set(&n->primary_should_be_hidden, true);
2860        } else {
2861            warn_report("couldn't unplug primary device");
2862        }
2863    } else if (migration_has_failed(s)) {
2864        /* We already unplugged the device let's plug it back */
2865        if (!failover_replug_primary(n, &err)) {
2866            if (err) {
2867                error_report_err(err);
2868            }
2869        }
2870    }
2871}
2872
2873static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
2874{
2875    MigrationState *s = data;
2876    VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
2877    virtio_net_handle_migration_primary(n, s);
2878}
2879
2880static int virtio_net_primary_should_be_hidden(DeviceListener *listener,
2881            QemuOpts *device_opts)
2882{
2883    VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
2884    bool match_found = false;
2885    bool hide = false;
2886
2887    if (!device_opts) {
2888        return -1;
2889    }
2890    n->primary_device_dict = qemu_opts_to_qdict(device_opts,
2891            n->primary_device_dict);
2892    if (n->primary_device_dict) {
2893        g_free(n->standby_id);
2894        n->standby_id = g_strdup(qdict_get_try_str(n->primary_device_dict,
2895                    "failover_pair_id"));
2896    }
2897    if (g_strcmp0(n->standby_id, n->netclient_name) == 0) {
2898        match_found = true;
2899    } else {
2900        match_found = false;
2901        hide = false;
2902        g_free(n->standby_id);
2903        n->primary_device_dict = NULL;
2904        goto out;
2905    }
2906
2907    n->primary_device_opts = device_opts;
2908
2909    /* primary_should_be_hidden is set during feature negotiation */
2910    hide = atomic_read(&n->primary_should_be_hidden);
2911
2912    if (n->primary_device_dict) {
2913        g_free(n->primary_device_id);
2914        n->primary_device_id = g_strdup(qdict_get_try_str(
2915                    n->primary_device_dict, "id"));
2916        if (!n->primary_device_id) {
2917            warn_report("primary_device_id not set");
2918        }
2919    }
2920
2921out:
2922    if (match_found && hide) {
2923        return 1;
2924    } else if (match_found && !hide) {
2925        return 0;
2926    } else {
2927        return -1;
2928    }
2929}
2930
2931static void virtio_net_device_realize(DeviceState *dev, Error **errp)
2932{
2933    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2934    VirtIONet *n = VIRTIO_NET(dev);
2935    NetClientState *nc;
2936    int i;
2937
2938    if (n->net_conf.mtu) {
2939        n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
2940    }
2941
2942    if (n->net_conf.duplex_str) {
2943        if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
2944            n->net_conf.duplex = DUPLEX_HALF;
2945        } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
2946            n->net_conf.duplex = DUPLEX_FULL;
2947        } else {
2948            error_setg(errp, "'duplex' must be 'half' or 'full'");
2949        }
2950        n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
2951    } else {
2952        n->net_conf.duplex = DUPLEX_UNKNOWN;
2953    }
2954
2955    if (n->net_conf.speed < SPEED_UNKNOWN) {
2956        error_setg(errp, "'speed' must be between 0 and INT_MAX");
2957    } else if (n->net_conf.speed >= 0) {
2958        n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
2959    }
2960
2961    if (n->failover) {
2962        n->primary_listener.should_be_hidden =
2963            virtio_net_primary_should_be_hidden;
2964        atomic_set(&n->primary_should_be_hidden, true);
2965        device_listener_register(&n->primary_listener);
2966        n->migration_state.notify = virtio_net_migration_state_notifier;
2967        add_migration_state_change_notifier(&n->migration_state);
2968        n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
2969    }
2970
2971    virtio_net_set_config_size(n, n->host_features);
2972    virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
2973
2974    /*
2975     * We set a lower limit on RX queue size to what it always was.
2976     * Guests that want a smaller ring can always resize it without
2977     * help from us (using virtio 1 and up).
2978     */
2979    if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
2980        n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
2981        !is_power_of_2(n->net_conf.rx_queue_size)) {
2982        error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
2983                   "must be a power of 2 between %d and %d.",
2984                   n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
2985                   VIRTQUEUE_MAX_SIZE);
2986        virtio_cleanup(vdev);
2987        return;
2988    }
2989
2990    if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
2991        n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
2992        !is_power_of_2(n->net_conf.tx_queue_size)) {
2993        error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
2994                   "must be a power of 2 between %d and %d",
2995                   n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
2996                   VIRTQUEUE_MAX_SIZE);
2997        virtio_cleanup(vdev);
2998        return;
2999    }
3000
3001    n->max_queues = MAX(n->nic_conf.peers.queues, 1);
3002    if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
3003        error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
3004                   "must be a positive integer less than %d.",
3005                   n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
3006        virtio_cleanup(vdev);
3007        return;
3008    }
3009    n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
3010    n->curr_queues = 1;
3011    n->tx_timeout = n->net_conf.txtimer;
3012
3013    if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3014                       && strcmp(n->net_conf.tx, "bh")) {
3015        warn_report("virtio-net: "
3016                    "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3017                    n->net_conf.tx);
3018        error_printf("Defaulting to \"bh\"");
3019    }
3020
3021    n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3022                                    n->net_conf.tx_queue_size);
3023
3024    for (i = 0; i < n->max_queues; i++) {
3025        virtio_net_add_queue(n, i);
3026    }
3027
3028    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3029    qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3030    memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3031    n->status = VIRTIO_NET_S_LINK_UP;
3032    qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3033                              QEMU_CLOCK_VIRTUAL,
3034                              virtio_net_announce_timer, n);
3035    n->announce_timer.round = 0;
3036
3037    if (n->netclient_type) {
3038        /*
3039         * Happen when virtio_net_set_netclient_name has been called.
3040         */
3041        n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3042                              n->netclient_type, n->netclient_name, n);
3043    } else {
3044        n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3045                              object_get_typename(OBJECT(dev)), dev->id, n);
3046    }
3047
3048    peer_test_vnet_hdr(n);
3049    if (peer_has_vnet_hdr(n)) {
3050        for (i = 0; i < n->max_queues; i++) {
3051            qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3052        }
3053        n->host_hdr_len = sizeof(struct virtio_net_hdr);
3054    } else {
3055        n->host_hdr_len = 0;
3056    }
3057
3058    qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3059
3060    n->vqs[0].tx_waiting = 0;
3061    n->tx_burst = n->net_conf.txburst;
3062    virtio_net_set_mrg_rx_bufs(n, 0, 0);
3063    n->promisc = 1; /* for compatibility */
3064
3065    n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3066
3067    n->vlans = g_malloc0(MAX_VLAN >> 3);
3068
3069    nc = qemu_get_queue(n->nic);
3070    nc->rxfilter_notify_enabled = 1;
3071
3072    QTAILQ_INIT(&n->rsc_chains);
3073    n->qdev = dev;
3074}
3075
3076static void virtio_net_device_unrealize(DeviceState *dev, Error **errp)
3077{
3078    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3079    VirtIONet *n = VIRTIO_NET(dev);
3080    int i, max_queues;
3081
3082    /* This will stop vhost backend if appropriate. */
3083    virtio_net_set_status(vdev, 0);
3084
3085    g_free(n->netclient_name);
3086    n->netclient_name = NULL;
3087    g_free(n->netclient_type);
3088    n->netclient_type = NULL;
3089
3090    g_free(n->mac_table.macs);
3091    g_free(n->vlans);
3092
3093    if (n->failover) {
3094        g_free(n->primary_device_id);
3095        g_free(n->standby_id);
3096        qobject_unref(n->primary_device_dict);
3097        n->primary_device_dict = NULL;
3098    }
3099
3100    max_queues = n->multiqueue ? n->max_queues : 1;
3101    for (i = 0; i < max_queues; i++) {
3102        virtio_net_del_queue(n, i);
3103    }
3104
3105    qemu_announce_timer_del(&n->announce_timer, false);
3106    g_free(n->vqs);
3107    qemu_del_nic(n->nic);
3108    virtio_net_rsc_cleanup(n);
3109    virtio_cleanup(vdev);
3110}
3111
3112static void virtio_net_instance_init(Object *obj)
3113{
3114    VirtIONet *n = VIRTIO_NET(obj);
3115
3116    /*
3117     * The default config_size is sizeof(struct virtio_net_config).
3118     * Can be overriden with virtio_net_set_config_size.
3119     */
3120    n->config_size = sizeof(struct virtio_net_config);
3121    device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3122                                  "bootindex", "/ethernet-phy@0",
3123                                  DEVICE(n), NULL);
3124}
3125
3126static int virtio_net_pre_save(void *opaque)
3127{
3128    VirtIONet *n = opaque;
3129
3130    /* At this point, backend must be stopped, otherwise
3131     * it might keep writing to memory. */
3132    assert(!n->vhost_started);
3133
3134    return 0;
3135}
3136
3137static bool primary_unplug_pending(void *opaque)
3138{
3139    DeviceState *dev = opaque;
3140    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3141    VirtIONet *n = VIRTIO_NET(vdev);
3142
3143    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3144        return false;
3145    }
3146    return n->primary_dev ? n->primary_dev->pending_deleted_event : false;
3147}
3148
3149static bool dev_unplug_pending(void *opaque)
3150{
3151    DeviceState *dev = opaque;
3152    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3153
3154    return vdc->primary_unplug_pending(dev);
3155}
3156
3157static const VMStateDescription vmstate_virtio_net = {
3158    .name = "virtio-net",
3159    .minimum_version_id = VIRTIO_NET_VM_VERSION,
3160    .version_id = VIRTIO_NET_VM_VERSION,
3161    .fields = (VMStateField[]) {
3162        VMSTATE_VIRTIO_DEVICE,
3163        VMSTATE_END_OF_LIST()
3164    },
3165    .pre_save = virtio_net_pre_save,
3166    .dev_unplug_pending = dev_unplug_pending,
3167};
3168
3169static Property virtio_net_properties[] = {
3170    DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3171                    VIRTIO_NET_F_CSUM, true),
3172    DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3173                    VIRTIO_NET_F_GUEST_CSUM, true),
3174    DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3175    DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3176                    VIRTIO_NET_F_GUEST_TSO4, true),
3177    DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3178                    VIRTIO_NET_F_GUEST_TSO6, true),
3179    DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3180                    VIRTIO_NET_F_GUEST_ECN, true),
3181    DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3182                    VIRTIO_NET_F_GUEST_UFO, true),
3183    DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3184                    VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3185    DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3186                    VIRTIO_NET_F_HOST_TSO4, true),
3187    DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3188                    VIRTIO_NET_F_HOST_TSO6, true),
3189    DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3190                    VIRTIO_NET_F_HOST_ECN, true),
3191    DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3192                    VIRTIO_NET_F_HOST_UFO, true),
3193    DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3194                    VIRTIO_NET_F_MRG_RXBUF, true),
3195    DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3196                    VIRTIO_NET_F_STATUS, true),
3197    DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3198                    VIRTIO_NET_F_CTRL_VQ, true),
3199    DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3200                    VIRTIO_NET_F_CTRL_RX, true),
3201    DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3202                    VIRTIO_NET_F_CTRL_VLAN, true),
3203    DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3204                    VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3205    DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3206                    VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3207    DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3208                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3209    DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3210    DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3211                    VIRTIO_NET_F_RSC_EXT, false),
3212    DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3213                       VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3214    DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3215    DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3216                       TX_TIMER_INTERVAL),
3217    DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3218    DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3219    DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3220                       VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3221    DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3222                       VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3223    DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3224    DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3225                     true),
3226    DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3227    DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3228    DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3229    DEFINE_PROP_END_OF_LIST(),
3230};
3231
3232static void virtio_net_class_init(ObjectClass *klass, void *data)
3233{
3234    DeviceClass *dc = DEVICE_CLASS(klass);
3235    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3236
3237    dc->props = virtio_net_properties;
3238    dc->vmsd = &vmstate_virtio_net;
3239    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3240    vdc->realize = virtio_net_device_realize;
3241    vdc->unrealize = virtio_net_device_unrealize;
3242    vdc->get_config = virtio_net_get_config;
3243    vdc->set_config = virtio_net_set_config;
3244    vdc->get_features = virtio_net_get_features;
3245    vdc->set_features = virtio_net_set_features;
3246    vdc->bad_features = virtio_net_bad_features;
3247    vdc->reset = virtio_net_reset;
3248    vdc->set_status = virtio_net_set_status;
3249    vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3250    vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3251    vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3252    vdc->post_load = virtio_net_post_load_virtio;
3253    vdc->vmsd = &vmstate_virtio_net_device;
3254    vdc->primary_unplug_pending = primary_unplug_pending;
3255}
3256
3257static const TypeInfo virtio_net_info = {
3258    .name = TYPE_VIRTIO_NET,
3259    .parent = TYPE_VIRTIO_DEVICE,
3260    .instance_size = sizeof(VirtIONet),
3261    .instance_init = virtio_net_instance_init,
3262    .class_init = virtio_net_class_init,
3263};
3264
3265static void virtio_register_types(void)
3266{
3267    type_register_static(&virtio_net_info);
3268}
3269
3270type_init(virtio_register_types)
3271