qemu/hw/net/virtio-net.c
<<
>>
Prefs
   1/*
   2 * Virtio Network Device
   3 *
   4 * Copyright IBM, Corp. 2007
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14#include "qemu/osdep.h"
  15#include "qemu/atomic.h"
  16#include "qemu/iov.h"
  17#include "qemu/main-loop.h"
  18#include "qemu/module.h"
  19#include "hw/virtio/virtio.h"
  20#include "net/net.h"
  21#include "net/checksum.h"
  22#include "net/tap.h"
  23#include "qemu/error-report.h"
  24#include "qemu/timer.h"
  25#include "qemu/option.h"
  26#include "qemu/option_int.h"
  27#include "qemu/config-file.h"
  28#include "qapi/qmp/qdict.h"
  29#include "hw/virtio/virtio-net.h"
  30#include "net/vhost_net.h"
  31#include "net/announce.h"
  32#include "hw/virtio/virtio-bus.h"
  33#include "qapi/error.h"
  34#include "qapi/qapi-events-net.h"
  35#include "hw/qdev-properties.h"
  36#include "qapi/qapi-types-migration.h"
  37#include "qapi/qapi-events-migration.h"
  38#include "hw/virtio/virtio-access.h"
  39#include "migration/misc.h"
  40#include "standard-headers/linux/ethtool.h"
  41#include "sysemu/sysemu.h"
  42#include "trace.h"
  43#include "monitor/qdev.h"
  44#include "hw/pci/pci.h"
  45#include "net_rx_pkt.h"
  46#include "hw/virtio/vhost.h"
  47
  48#define VIRTIO_NET_VM_VERSION    11
  49
  50#define MAC_TABLE_ENTRIES    64
  51#define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
  52
  53/* previously fixed value */
  54#define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
  55#define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
  56
  57/* for now, only allow larger queues; with virtio-1, guest can downsize */
  58#define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
  59#define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
  60
  61#define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
  62
  63#define VIRTIO_NET_TCP_FLAG         0x3F
  64#define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
  65
  66/* IPv4 max payload, 16 bits in the header */
  67#define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
  68#define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
  69
  70/* header length value in ip header without option */
  71#define VIRTIO_NET_IP4_HEADER_LENGTH 5
  72
  73#define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
  74#define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
  75
  76/* Purge coalesced packets timer interval, This value affects the performance
  77   a lot, and should be tuned carefully, '300000'(300us) is the recommended
  78   value to pass the WHQL test, '50000' can gain 2x netperf throughput with
  79   tso/gso/gro 'off'. */
  80#define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
  81
  82#define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
  83                                         VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
  84                                         VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
  85                                         VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
  86                                         VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
  87                                         VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
  88                                         VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
  89                                         VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
  90                                         VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
  91
  92static VirtIOFeature feature_sizes[] = {
  93    {.flags = 1ULL << VIRTIO_NET_F_MAC,
  94     .end = endof(struct virtio_net_config, mac)},
  95    {.flags = 1ULL << VIRTIO_NET_F_STATUS,
  96     .end = endof(struct virtio_net_config, status)},
  97    {.flags = 1ULL << VIRTIO_NET_F_MQ,
  98     .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
  99    {.flags = 1ULL << VIRTIO_NET_F_MTU,
 100     .end = endof(struct virtio_net_config, mtu)},
 101    {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
 102     .end = endof(struct virtio_net_config, duplex)},
 103    {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
 104     .end = endof(struct virtio_net_config, supported_hash_types)},
 105    {}
 106};
 107
 108static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
 109{
 110    VirtIONet *n = qemu_get_nic_opaque(nc);
 111
 112    return &n->vqs[nc->queue_index];
 113}
 114
 115static int vq2q(int queue_index)
 116{
 117    return queue_index / 2;
 118}
 119
 120/* TODO
 121 * - we could suppress RX interrupt if we were so inclined.
 122 */
 123
 124static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
 125{
 126    VirtIONet *n = VIRTIO_NET(vdev);
 127    struct virtio_net_config netcfg;
 128    NetClientState *nc = qemu_get_queue(n->nic);
 129
 130    int ret = 0;
 131    memset(&netcfg, 0 , sizeof(struct virtio_net_config));
 132    virtio_stw_p(vdev, &netcfg.status, n->status);
 133    virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
 134    virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
 135    memcpy(netcfg.mac, n->mac, ETH_ALEN);
 136    virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
 137    netcfg.duplex = n->net_conf.duplex;
 138    netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
 139    virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
 140                 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
 141                 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
 142    virtio_stl_p(vdev, &netcfg.supported_hash_types,
 143                 VIRTIO_NET_RSS_SUPPORTED_HASHES);
 144    memcpy(config, &netcfg, n->config_size);
 145
 146    /*
 147     * Is this VDPA? No peer means not VDPA: there's no way to
 148     * disconnect/reconnect a VDPA peer.
 149     */
 150    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
 151        ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
 152                                   n->config_size);
 153        if (ret != -1) {
 154            memcpy(config, &netcfg, n->config_size);
 155        }
 156    }
 157}
 158
 159static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
 160{
 161    VirtIONet *n = VIRTIO_NET(vdev);
 162    struct virtio_net_config netcfg = {};
 163    NetClientState *nc = qemu_get_queue(n->nic);
 164
 165    memcpy(&netcfg, config, n->config_size);
 166
 167    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
 168        !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
 169        memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
 170        memcpy(n->mac, netcfg.mac, ETH_ALEN);
 171        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 172    }
 173
 174    /*
 175     * Is this VDPA? No peer means not VDPA: there's no way to
 176     * disconnect/reconnect a VDPA peer.
 177     */
 178    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
 179        vhost_net_set_config(get_vhost_net(nc->peer),
 180                             (uint8_t *)&netcfg, 0, n->config_size,
 181                             VHOST_SET_CONFIG_TYPE_MASTER);
 182      }
 183}
 184
 185static bool virtio_net_started(VirtIONet *n, uint8_t status)
 186{
 187    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 188    return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 189        (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
 190}
 191
 192static void virtio_net_announce_notify(VirtIONet *net)
 193{
 194    VirtIODevice *vdev = VIRTIO_DEVICE(net);
 195    trace_virtio_net_announce_notify();
 196
 197    net->status |= VIRTIO_NET_S_ANNOUNCE;
 198    virtio_notify_config(vdev);
 199}
 200
 201static void virtio_net_announce_timer(void *opaque)
 202{
 203    VirtIONet *n = opaque;
 204    trace_virtio_net_announce_timer(n->announce_timer.round);
 205
 206    n->announce_timer.round--;
 207    virtio_net_announce_notify(n);
 208}
 209
 210static void virtio_net_announce(NetClientState *nc)
 211{
 212    VirtIONet *n = qemu_get_nic_opaque(nc);
 213    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 214
 215    /*
 216     * Make sure the virtio migration announcement timer isn't running
 217     * If it is, let it trigger announcement so that we do not cause
 218     * confusion.
 219     */
 220    if (n->announce_timer.round) {
 221        return;
 222    }
 223
 224    if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
 225        virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
 226            virtio_net_announce_notify(n);
 227    }
 228}
 229
 230static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
 231{
 232    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 233    NetClientState *nc = qemu_get_queue(n->nic);
 234    int queues = n->multiqueue ? n->max_queues : 1;
 235
 236    if (!get_vhost_net(nc->peer)) {
 237        return;
 238    }
 239
 240    if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
 241        !!n->vhost_started) {
 242        return;
 243    }
 244    if (!n->vhost_started) {
 245        int r, i;
 246
 247        if (n->needs_vnet_hdr_swap) {
 248            error_report("backend does not support %s vnet headers; "
 249                         "falling back on userspace virtio",
 250                         virtio_is_big_endian(vdev) ? "BE" : "LE");
 251            return;
 252        }
 253
 254        /* Any packets outstanding? Purge them to avoid touching rings
 255         * when vhost is running.
 256         */
 257        for (i = 0;  i < queues; i++) {
 258            NetClientState *qnc = qemu_get_subqueue(n->nic, i);
 259
 260            /* Purge both directions: TX and RX. */
 261            qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
 262            qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
 263        }
 264
 265        if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
 266            r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
 267            if (r < 0) {
 268                error_report("%uBytes MTU not supported by the backend",
 269                             n->net_conf.mtu);
 270
 271                return;
 272            }
 273        }
 274
 275        n->vhost_started = 1;
 276        r = vhost_net_start(vdev, n->nic->ncs, queues);
 277        if (r < 0) {
 278            error_report("unable to start vhost net: %d: "
 279                         "falling back on userspace virtio", -r);
 280            n->vhost_started = 0;
 281        }
 282    } else {
 283        vhost_net_stop(vdev, n->nic->ncs, queues);
 284        n->vhost_started = 0;
 285    }
 286}
 287
 288static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
 289                                          NetClientState *peer,
 290                                          bool enable)
 291{
 292    if (virtio_is_big_endian(vdev)) {
 293        return qemu_set_vnet_be(peer, enable);
 294    } else {
 295        return qemu_set_vnet_le(peer, enable);
 296    }
 297}
 298
 299static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
 300                                       int queues, bool enable)
 301{
 302    int i;
 303
 304    for (i = 0; i < queues; i++) {
 305        if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
 306            enable) {
 307            while (--i >= 0) {
 308                virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
 309            }
 310
 311            return true;
 312        }
 313    }
 314
 315    return false;
 316}
 317
 318static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
 319{
 320    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 321    int queues = n->multiqueue ? n->max_queues : 1;
 322
 323    if (virtio_net_started(n, status)) {
 324        /* Before using the device, we tell the network backend about the
 325         * endianness to use when parsing vnet headers. If the backend
 326         * can't do it, we fallback onto fixing the headers in the core
 327         * virtio-net code.
 328         */
 329        n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
 330                                                            queues, true);
 331    } else if (virtio_net_started(n, vdev->status)) {
 332        /* After using the device, we need to reset the network backend to
 333         * the default (guest native endianness), otherwise the guest may
 334         * lose network connectivity if it is rebooted into a different
 335         * endianness.
 336         */
 337        virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
 338    }
 339}
 340
 341static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
 342{
 343    unsigned int dropped = virtqueue_drop_all(vq);
 344    if (dropped) {
 345        virtio_notify(vdev, vq);
 346    }
 347}
 348
 349static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
 350{
 351    VirtIONet *n = VIRTIO_NET(vdev);
 352    VirtIONetQueue *q;
 353    int i;
 354    uint8_t queue_status;
 355
 356    virtio_net_vnet_endian_status(n, status);
 357    virtio_net_vhost_status(n, status);
 358
 359    for (i = 0; i < n->max_queues; i++) {
 360        NetClientState *ncs = qemu_get_subqueue(n->nic, i);
 361        bool queue_started;
 362        q = &n->vqs[i];
 363
 364        if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
 365            queue_status = 0;
 366        } else {
 367            queue_status = status;
 368        }
 369        queue_started =
 370            virtio_net_started(n, queue_status) && !n->vhost_started;
 371
 372        if (queue_started) {
 373            qemu_flush_queued_packets(ncs);
 374        }
 375
 376        if (!q->tx_waiting) {
 377            continue;
 378        }
 379
 380        if (queue_started) {
 381            if (q->tx_timer) {
 382                timer_mod(q->tx_timer,
 383                               qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
 384            } else {
 385                qemu_bh_schedule(q->tx_bh);
 386            }
 387        } else {
 388            if (q->tx_timer) {
 389                timer_del(q->tx_timer);
 390            } else {
 391                qemu_bh_cancel(q->tx_bh);
 392            }
 393            if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
 394                (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 395                vdev->vm_running) {
 396                /* if tx is waiting we are likely have some packets in tx queue
 397                 * and disabled notification */
 398                q->tx_waiting = 0;
 399                virtio_queue_set_notification(q->tx_vq, 1);
 400                virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
 401            }
 402        }
 403    }
 404}
 405
 406static void virtio_net_set_link_status(NetClientState *nc)
 407{
 408    VirtIONet *n = qemu_get_nic_opaque(nc);
 409    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 410    uint16_t old_status = n->status;
 411
 412    if (nc->link_down)
 413        n->status &= ~VIRTIO_NET_S_LINK_UP;
 414    else
 415        n->status |= VIRTIO_NET_S_LINK_UP;
 416
 417    if (n->status != old_status)
 418        virtio_notify_config(vdev);
 419
 420    virtio_net_set_status(vdev, vdev->status);
 421}
 422
 423static void rxfilter_notify(NetClientState *nc)
 424{
 425    VirtIONet *n = qemu_get_nic_opaque(nc);
 426
 427    if (nc->rxfilter_notify_enabled) {
 428        char *path = object_get_canonical_path(OBJECT(n->qdev));
 429        qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
 430                                              n->netclient_name, path);
 431        g_free(path);
 432
 433        /* disable event notification to avoid events flooding */
 434        nc->rxfilter_notify_enabled = 0;
 435    }
 436}
 437
 438static intList *get_vlan_table(VirtIONet *n)
 439{
 440    intList *list, *entry;
 441    int i, j;
 442
 443    list = NULL;
 444    for (i = 0; i < MAX_VLAN >> 5; i++) {
 445        for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
 446            if (n->vlans[i] & (1U << j)) {
 447                entry = g_malloc0(sizeof(*entry));
 448                entry->value = (i << 5) + j;
 449                entry->next = list;
 450                list = entry;
 451            }
 452        }
 453    }
 454
 455    return list;
 456}
 457
 458static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
 459{
 460    VirtIONet *n = qemu_get_nic_opaque(nc);
 461    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 462    RxFilterInfo *info;
 463    strList *str_list, *entry;
 464    int i;
 465
 466    info = g_malloc0(sizeof(*info));
 467    info->name = g_strdup(nc->name);
 468    info->promiscuous = n->promisc;
 469
 470    if (n->nouni) {
 471        info->unicast = RX_STATE_NONE;
 472    } else if (n->alluni) {
 473        info->unicast = RX_STATE_ALL;
 474    } else {
 475        info->unicast = RX_STATE_NORMAL;
 476    }
 477
 478    if (n->nomulti) {
 479        info->multicast = RX_STATE_NONE;
 480    } else if (n->allmulti) {
 481        info->multicast = RX_STATE_ALL;
 482    } else {
 483        info->multicast = RX_STATE_NORMAL;
 484    }
 485
 486    info->broadcast_allowed = n->nobcast;
 487    info->multicast_overflow = n->mac_table.multi_overflow;
 488    info->unicast_overflow = n->mac_table.uni_overflow;
 489
 490    info->main_mac = qemu_mac_strdup_printf(n->mac);
 491
 492    str_list = NULL;
 493    for (i = 0; i < n->mac_table.first_multi; i++) {
 494        entry = g_malloc0(sizeof(*entry));
 495        entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
 496        entry->next = str_list;
 497        str_list = entry;
 498    }
 499    info->unicast_table = str_list;
 500
 501    str_list = NULL;
 502    for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
 503        entry = g_malloc0(sizeof(*entry));
 504        entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
 505        entry->next = str_list;
 506        str_list = entry;
 507    }
 508    info->multicast_table = str_list;
 509    info->vlan_table = get_vlan_table(n);
 510
 511    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
 512        info->vlan = RX_STATE_ALL;
 513    } else if (!info->vlan_table) {
 514        info->vlan = RX_STATE_NONE;
 515    } else {
 516        info->vlan = RX_STATE_NORMAL;
 517    }
 518
 519    /* enable event notification after query */
 520    nc->rxfilter_notify_enabled = 1;
 521
 522    return info;
 523}
 524
 525static void virtio_net_reset(VirtIODevice *vdev)
 526{
 527    VirtIONet *n = VIRTIO_NET(vdev);
 528    int i;
 529
 530    /* Reset back to compatibility mode */
 531    n->promisc = 1;
 532    n->allmulti = 0;
 533    n->alluni = 0;
 534    n->nomulti = 0;
 535    n->nouni = 0;
 536    n->nobcast = 0;
 537    /* multiqueue is disabled by default */
 538    n->curr_queues = 1;
 539    timer_del(n->announce_timer.tm);
 540    n->announce_timer.round = 0;
 541    n->status &= ~VIRTIO_NET_S_ANNOUNCE;
 542
 543    /* Flush any MAC and VLAN filter table state */
 544    n->mac_table.in_use = 0;
 545    n->mac_table.first_multi = 0;
 546    n->mac_table.multi_overflow = 0;
 547    n->mac_table.uni_overflow = 0;
 548    memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
 549    memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
 550    qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 551    memset(n->vlans, 0, MAX_VLAN >> 3);
 552
 553    /* Flush any async TX */
 554    for (i = 0;  i < n->max_queues; i++) {
 555        NetClientState *nc = qemu_get_subqueue(n->nic, i);
 556
 557        if (nc->peer) {
 558            qemu_flush_or_purge_queued_packets(nc->peer, true);
 559            assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
 560        }
 561    }
 562}
 563
 564static void peer_test_vnet_hdr(VirtIONet *n)
 565{
 566    NetClientState *nc = qemu_get_queue(n->nic);
 567    if (!nc->peer) {
 568        return;
 569    }
 570
 571    n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
 572}
 573
 574static int peer_has_vnet_hdr(VirtIONet *n)
 575{
 576    return n->has_vnet_hdr;
 577}
 578
 579static int peer_has_ufo(VirtIONet *n)
 580{
 581    if (!peer_has_vnet_hdr(n))
 582        return 0;
 583
 584    n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
 585
 586    return n->has_ufo;
 587}
 588
 589static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
 590                                       int version_1, int hash_report)
 591{
 592    int i;
 593    NetClientState *nc;
 594
 595    n->mergeable_rx_bufs = mergeable_rx_bufs;
 596
 597    if (version_1) {
 598        n->guest_hdr_len = hash_report ?
 599            sizeof(struct virtio_net_hdr_v1_hash) :
 600            sizeof(struct virtio_net_hdr_mrg_rxbuf);
 601        n->rss_data.populate_hash = !!hash_report;
 602    } else {
 603        n->guest_hdr_len = n->mergeable_rx_bufs ?
 604            sizeof(struct virtio_net_hdr_mrg_rxbuf) :
 605            sizeof(struct virtio_net_hdr);
 606    }
 607
 608    for (i = 0; i < n->max_queues; i++) {
 609        nc = qemu_get_subqueue(n->nic, i);
 610
 611        if (peer_has_vnet_hdr(n) &&
 612            qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
 613            qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
 614            n->host_hdr_len = n->guest_hdr_len;
 615        }
 616    }
 617}
 618
 619static int virtio_net_max_tx_queue_size(VirtIONet *n)
 620{
 621    NetClientState *peer = n->nic_conf.peers.ncs[0];
 622
 623    /*
 624     * Backends other than vhost-user don't support max queue size.
 625     */
 626    if (!peer) {
 627        return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
 628    }
 629
 630    if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
 631        return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
 632    }
 633
 634    return VIRTQUEUE_MAX_SIZE;
 635}
 636
 637static int peer_attach(VirtIONet *n, int index)
 638{
 639    NetClientState *nc = qemu_get_subqueue(n->nic, index);
 640
 641    if (!nc->peer) {
 642        return 0;
 643    }
 644
 645    if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 646        vhost_set_vring_enable(nc->peer, 1);
 647    }
 648
 649    if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
 650        return 0;
 651    }
 652
 653    if (n->max_queues == 1) {
 654        return 0;
 655    }
 656
 657    return tap_enable(nc->peer);
 658}
 659
 660static int peer_detach(VirtIONet *n, int index)
 661{
 662    NetClientState *nc = qemu_get_subqueue(n->nic, index);
 663
 664    if (!nc->peer) {
 665        return 0;
 666    }
 667
 668    if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 669        vhost_set_vring_enable(nc->peer, 0);
 670    }
 671
 672    if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
 673        return 0;
 674    }
 675
 676    return tap_disable(nc->peer);
 677}
 678
 679static void virtio_net_set_queues(VirtIONet *n)
 680{
 681    int i;
 682    int r;
 683
 684    if (n->nic->peer_deleted) {
 685        return;
 686    }
 687
 688    for (i = 0; i < n->max_queues; i++) {
 689        if (i < n->curr_queues) {
 690            r = peer_attach(n, i);
 691            assert(!r);
 692        } else {
 693            r = peer_detach(n, i);
 694            assert(!r);
 695        }
 696    }
 697}
 698
 699static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
 700
 701static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
 702                                        Error **errp)
 703{
 704    VirtIONet *n = VIRTIO_NET(vdev);
 705    NetClientState *nc = qemu_get_queue(n->nic);
 706
 707    /* Firstly sync all virtio-net possible supported features */
 708    features |= n->host_features;
 709
 710    virtio_add_feature(&features, VIRTIO_NET_F_MAC);
 711
 712    if (!peer_has_vnet_hdr(n)) {
 713        virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
 714        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
 715        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
 716        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
 717
 718        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
 719        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
 720        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
 721        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
 722
 723        virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
 724    }
 725
 726    if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
 727        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
 728        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
 729    }
 730
 731    if (!get_vhost_net(nc->peer)) {
 732        return features;
 733    }
 734
 735    virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
 736    virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
 737    features = vhost_net_get_features(get_vhost_net(nc->peer), features);
 738    vdev->backend_features = features;
 739
 740    if (n->mtu_bypass_backend &&
 741            (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
 742        features |= (1ULL << VIRTIO_NET_F_MTU);
 743    }
 744
 745    return features;
 746}
 747
 748static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
 749{
 750    uint64_t features = 0;
 751
 752    /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
 753     * but also these: */
 754    virtio_add_feature(&features, VIRTIO_NET_F_MAC);
 755    virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
 756    virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
 757    virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
 758    virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
 759
 760    return features;
 761}
 762
 763static void virtio_net_apply_guest_offloads(VirtIONet *n)
 764{
 765    qemu_set_offload(qemu_get_queue(n->nic)->peer,
 766            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
 767            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
 768            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
 769            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
 770            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
 771}
 772
 773static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
 774{
 775    static const uint64_t guest_offloads_mask =
 776        (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
 777        (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
 778        (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
 779        (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
 780        (1ULL << VIRTIO_NET_F_GUEST_UFO);
 781
 782    return guest_offloads_mask & features;
 783}
 784
 785static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
 786{
 787    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 788    return virtio_net_guest_offloads_by_features(vdev->guest_features);
 789}
 790
 791static void failover_add_primary(VirtIONet *n, Error **errp)
 792{
 793    Error *err = NULL;
 794
 795    if (n->primary_dev) {
 796        return;
 797    }
 798
 799    n->primary_device_opts = qemu_opts_find(qemu_find_opts("device"),
 800            n->primary_device_id);
 801    if (n->primary_device_opts) {
 802        n->primary_dev = qdev_device_add(n->primary_device_opts, &err);
 803        if (err) {
 804            qemu_opts_del(n->primary_device_opts);
 805        }
 806        if (n->primary_dev) {
 807            n->primary_bus = n->primary_dev->parent_bus;
 808            if (err) {
 809                qdev_unplug(n->primary_dev, &err);
 810                qdev_set_id(n->primary_dev, "");
 811
 812            }
 813        }
 814    } else {
 815        error_setg(errp, "Primary device not found");
 816        error_append_hint(errp, "Virtio-net failover will not work. Make "
 817            "sure primary device has parameter"
 818            " failover_pair_id=<virtio-net-id>\n");
 819}
 820    error_propagate(errp, err);
 821}
 822
 823static int is_my_primary(void *opaque, QemuOpts *opts, Error **errp)
 824{
 825    VirtIONet *n = opaque;
 826    int ret = 0;
 827
 828    const char *standby_id = qemu_opt_get(opts, "failover_pair_id");
 829
 830    if (standby_id != NULL && (g_strcmp0(standby_id, n->netclient_name) == 0)) {
 831        n->primary_device_id = g_strdup(opts->id);
 832        ret = 1;
 833    }
 834
 835    return ret;
 836}
 837
 838static DeviceState *virtio_net_find_primary(VirtIONet *n, Error **errp)
 839{
 840    DeviceState *dev = NULL;
 841    Error *err = NULL;
 842
 843    if (qemu_opts_foreach(qemu_find_opts("device"),
 844                         is_my_primary, n, &err)) {
 845        if (err) {
 846            error_propagate(errp, err);
 847            return NULL;
 848        }
 849        if (n->primary_device_id) {
 850            dev = qdev_find_recursive(sysbus_get_default(),
 851                    n->primary_device_id);
 852        } else {
 853            error_setg(errp, "Primary device id not found");
 854            return NULL;
 855        }
 856    }
 857    return dev;
 858}
 859
 860
 861
 862static DeviceState *virtio_connect_failover_devices(VirtIONet *n,
 863                                                    DeviceState *dev,
 864                                                    Error **errp)
 865{
 866    DeviceState *prim_dev = NULL;
 867    Error *err = NULL;
 868
 869    prim_dev = virtio_net_find_primary(n, &err);
 870    if (prim_dev) {
 871        n->primary_device_id = g_strdup(prim_dev->id);
 872        n->primary_device_opts = prim_dev->opts;
 873    } else {
 874        error_propagate(errp, err);
 875    }
 876
 877    return prim_dev;
 878}
 879
 880static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
 881{
 882    VirtIONet *n = VIRTIO_NET(vdev);
 883    Error *err = NULL;
 884    int i;
 885
 886    if (n->mtu_bypass_backend &&
 887            !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
 888        features &= ~(1ULL << VIRTIO_NET_F_MTU);
 889    }
 890
 891    virtio_net_set_multiqueue(n,
 892                              virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
 893                              virtio_has_feature(features, VIRTIO_NET_F_MQ));
 894
 895    virtio_net_set_mrg_rx_bufs(n,
 896                               virtio_has_feature(features,
 897                                                  VIRTIO_NET_F_MRG_RXBUF),
 898                               virtio_has_feature(features,
 899                                                  VIRTIO_F_VERSION_1),
 900                               virtio_has_feature(features,
 901                                                  VIRTIO_NET_F_HASH_REPORT));
 902
 903    n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
 904        virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
 905    n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
 906        virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
 907    n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
 908
 909    if (n->has_vnet_hdr) {
 910        n->curr_guest_offloads =
 911            virtio_net_guest_offloads_by_features(features);
 912        virtio_net_apply_guest_offloads(n);
 913    }
 914
 915    for (i = 0;  i < n->max_queues; i++) {
 916        NetClientState *nc = qemu_get_subqueue(n->nic, i);
 917
 918        if (!get_vhost_net(nc->peer)) {
 919            continue;
 920        }
 921        vhost_net_ack_features(get_vhost_net(nc->peer), features);
 922    }
 923
 924    if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
 925        memset(n->vlans, 0, MAX_VLAN >> 3);
 926    } else {
 927        memset(n->vlans, 0xff, MAX_VLAN >> 3);
 928    }
 929
 930    if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
 931        qapi_event_send_failover_negotiated(n->netclient_name);
 932        qatomic_set(&n->primary_should_be_hidden, false);
 933        failover_add_primary(n, &err);
 934        if (err) {
 935            n->primary_dev = virtio_connect_failover_devices(n, n->qdev, &err);
 936            if (err) {
 937                goto out_err;
 938            }
 939            failover_add_primary(n, &err);
 940            if (err) {
 941                goto out_err;
 942            }
 943        }
 944    }
 945    return;
 946
 947out_err:
 948    if (err) {
 949        warn_report_err(err);
 950    }
 951}
 952
 953static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
 954                                     struct iovec *iov, unsigned int iov_cnt)
 955{
 956    uint8_t on;
 957    size_t s;
 958    NetClientState *nc = qemu_get_queue(n->nic);
 959
 960    s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
 961    if (s != sizeof(on)) {
 962        return VIRTIO_NET_ERR;
 963    }
 964
 965    if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
 966        n->promisc = on;
 967    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
 968        n->allmulti = on;
 969    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
 970        n->alluni = on;
 971    } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
 972        n->nomulti = on;
 973    } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
 974        n->nouni = on;
 975    } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
 976        n->nobcast = on;
 977    } else {
 978        return VIRTIO_NET_ERR;
 979    }
 980
 981    rxfilter_notify(nc);
 982
 983    return VIRTIO_NET_OK;
 984}
 985
 986static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
 987                                     struct iovec *iov, unsigned int iov_cnt)
 988{
 989    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 990    uint64_t offloads;
 991    size_t s;
 992
 993    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
 994        return VIRTIO_NET_ERR;
 995    }
 996
 997    s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
 998    if (s != sizeof(offloads)) {
 999        return VIRTIO_NET_ERR;
1000    }
1001
1002    if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
1003        uint64_t supported_offloads;
1004
1005        offloads = virtio_ldq_p(vdev, &offloads);
1006
1007        if (!n->has_vnet_hdr) {
1008            return VIRTIO_NET_ERR;
1009        }
1010
1011        n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1012            virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1013        n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1014            virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1015        virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1016
1017        supported_offloads = virtio_net_supported_guest_offloads(n);
1018        if (offloads & ~supported_offloads) {
1019            return VIRTIO_NET_ERR;
1020        }
1021
1022        n->curr_guest_offloads = offloads;
1023        virtio_net_apply_guest_offloads(n);
1024
1025        return VIRTIO_NET_OK;
1026    } else {
1027        return VIRTIO_NET_ERR;
1028    }
1029}
1030
1031static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1032                                 struct iovec *iov, unsigned int iov_cnt)
1033{
1034    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1035    struct virtio_net_ctrl_mac mac_data;
1036    size_t s;
1037    NetClientState *nc = qemu_get_queue(n->nic);
1038
1039    if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1040        if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1041            return VIRTIO_NET_ERR;
1042        }
1043        s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1044        assert(s == sizeof(n->mac));
1045        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1046        rxfilter_notify(nc);
1047
1048        return VIRTIO_NET_OK;
1049    }
1050
1051    if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1052        return VIRTIO_NET_ERR;
1053    }
1054
1055    int in_use = 0;
1056    int first_multi = 0;
1057    uint8_t uni_overflow = 0;
1058    uint8_t multi_overflow = 0;
1059    uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1060
1061    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1062                   sizeof(mac_data.entries));
1063    mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1064    if (s != sizeof(mac_data.entries)) {
1065        goto error;
1066    }
1067    iov_discard_front(&iov, &iov_cnt, s);
1068
1069    if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1070        goto error;
1071    }
1072
1073    if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1074        s = iov_to_buf(iov, iov_cnt, 0, macs,
1075                       mac_data.entries * ETH_ALEN);
1076        if (s != mac_data.entries * ETH_ALEN) {
1077            goto error;
1078        }
1079        in_use += mac_data.entries;
1080    } else {
1081        uni_overflow = 1;
1082    }
1083
1084    iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1085
1086    first_multi = in_use;
1087
1088    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1089                   sizeof(mac_data.entries));
1090    mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1091    if (s != sizeof(mac_data.entries)) {
1092        goto error;
1093    }
1094
1095    iov_discard_front(&iov, &iov_cnt, s);
1096
1097    if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1098        goto error;
1099    }
1100
1101    if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1102        s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1103                       mac_data.entries * ETH_ALEN);
1104        if (s != mac_data.entries * ETH_ALEN) {
1105            goto error;
1106        }
1107        in_use += mac_data.entries;
1108    } else {
1109        multi_overflow = 1;
1110    }
1111
1112    n->mac_table.in_use = in_use;
1113    n->mac_table.first_multi = first_multi;
1114    n->mac_table.uni_overflow = uni_overflow;
1115    n->mac_table.multi_overflow = multi_overflow;
1116    memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1117    g_free(macs);
1118    rxfilter_notify(nc);
1119
1120    return VIRTIO_NET_OK;
1121
1122error:
1123    g_free(macs);
1124    return VIRTIO_NET_ERR;
1125}
1126
1127static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1128                                        struct iovec *iov, unsigned int iov_cnt)
1129{
1130    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1131    uint16_t vid;
1132    size_t s;
1133    NetClientState *nc = qemu_get_queue(n->nic);
1134
1135    s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1136    vid = virtio_lduw_p(vdev, &vid);
1137    if (s != sizeof(vid)) {
1138        return VIRTIO_NET_ERR;
1139    }
1140
1141    if (vid >= MAX_VLAN)
1142        return VIRTIO_NET_ERR;
1143
1144    if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1145        n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1146    else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1147        n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1148    else
1149        return VIRTIO_NET_ERR;
1150
1151    rxfilter_notify(nc);
1152
1153    return VIRTIO_NET_OK;
1154}
1155
1156static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1157                                      struct iovec *iov, unsigned int iov_cnt)
1158{
1159    trace_virtio_net_handle_announce(n->announce_timer.round);
1160    if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1161        n->status & VIRTIO_NET_S_ANNOUNCE) {
1162        n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1163        if (n->announce_timer.round) {
1164            qemu_announce_timer_step(&n->announce_timer);
1165        }
1166        return VIRTIO_NET_OK;
1167    } else {
1168        return VIRTIO_NET_ERR;
1169    }
1170}
1171
1172static void virtio_net_disable_rss(VirtIONet *n)
1173{
1174    if (n->rss_data.enabled) {
1175        trace_virtio_net_rss_disable();
1176    }
1177    n->rss_data.enabled = false;
1178}
1179
1180static uint16_t virtio_net_handle_rss(VirtIONet *n,
1181                                      struct iovec *iov,
1182                                      unsigned int iov_cnt,
1183                                      bool do_rss)
1184{
1185    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1186    struct virtio_net_rss_config cfg;
1187    size_t s, offset = 0, size_get;
1188    uint16_t queues, i;
1189    struct {
1190        uint16_t us;
1191        uint8_t b;
1192    } QEMU_PACKED temp;
1193    const char *err_msg = "";
1194    uint32_t err_value = 0;
1195
1196    if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1197        err_msg = "RSS is not negotiated";
1198        goto error;
1199    }
1200    if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1201        err_msg = "Hash report is not negotiated";
1202        goto error;
1203    }
1204    size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1205    s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1206    if (s != size_get) {
1207        err_msg = "Short command buffer";
1208        err_value = (uint32_t)s;
1209        goto error;
1210    }
1211    n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1212    n->rss_data.indirections_len =
1213        virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1214    n->rss_data.indirections_len++;
1215    if (!do_rss) {
1216        n->rss_data.indirections_len = 1;
1217    }
1218    if (!is_power_of_2(n->rss_data.indirections_len)) {
1219        err_msg = "Invalid size of indirection table";
1220        err_value = n->rss_data.indirections_len;
1221        goto error;
1222    }
1223    if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1224        err_msg = "Too large indirection table";
1225        err_value = n->rss_data.indirections_len;
1226        goto error;
1227    }
1228    n->rss_data.default_queue = do_rss ?
1229        virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1230    if (n->rss_data.default_queue >= n->max_queues) {
1231        err_msg = "Invalid default queue";
1232        err_value = n->rss_data.default_queue;
1233        goto error;
1234    }
1235    offset += size_get;
1236    size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1237    g_free(n->rss_data.indirections_table);
1238    n->rss_data.indirections_table = g_malloc(size_get);
1239    if (!n->rss_data.indirections_table) {
1240        err_msg = "Can't allocate indirections table";
1241        err_value = n->rss_data.indirections_len;
1242        goto error;
1243    }
1244    s = iov_to_buf(iov, iov_cnt, offset,
1245                   n->rss_data.indirections_table, size_get);
1246    if (s != size_get) {
1247        err_msg = "Short indirection table buffer";
1248        err_value = (uint32_t)s;
1249        goto error;
1250    }
1251    for (i = 0; i < n->rss_data.indirections_len; ++i) {
1252        uint16_t val = n->rss_data.indirections_table[i];
1253        n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1254    }
1255    offset += size_get;
1256    size_get = sizeof(temp);
1257    s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1258    if (s != size_get) {
1259        err_msg = "Can't get queues";
1260        err_value = (uint32_t)s;
1261        goto error;
1262    }
1263    queues = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queues;
1264    if (queues == 0 || queues > n->max_queues) {
1265        err_msg = "Invalid number of queues";
1266        err_value = queues;
1267        goto error;
1268    }
1269    if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1270        err_msg = "Invalid key size";
1271        err_value = temp.b;
1272        goto error;
1273    }
1274    if (!temp.b && n->rss_data.hash_types) {
1275        err_msg = "No key provided";
1276        err_value = 0;
1277        goto error;
1278    }
1279    if (!temp.b && !n->rss_data.hash_types) {
1280        virtio_net_disable_rss(n);
1281        return queues;
1282    }
1283    offset += size_get;
1284    size_get = temp.b;
1285    s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1286    if (s != size_get) {
1287        err_msg = "Can get key buffer";
1288        err_value = (uint32_t)s;
1289        goto error;
1290    }
1291    n->rss_data.enabled = true;
1292    trace_virtio_net_rss_enable(n->rss_data.hash_types,
1293                                n->rss_data.indirections_len,
1294                                temp.b);
1295    return queues;
1296error:
1297    trace_virtio_net_rss_error(err_msg, err_value);
1298    virtio_net_disable_rss(n);
1299    return 0;
1300}
1301
1302static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1303                                struct iovec *iov, unsigned int iov_cnt)
1304{
1305    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1306    uint16_t queues;
1307
1308    virtio_net_disable_rss(n);
1309    if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1310        queues = virtio_net_handle_rss(n, iov, iov_cnt, false);
1311        return queues ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1312    }
1313    if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1314        queues = virtio_net_handle_rss(n, iov, iov_cnt, true);
1315    } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1316        struct virtio_net_ctrl_mq mq;
1317        size_t s;
1318        if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1319            return VIRTIO_NET_ERR;
1320        }
1321        s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1322        if (s != sizeof(mq)) {
1323            return VIRTIO_NET_ERR;
1324        }
1325        queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1326
1327    } else {
1328        return VIRTIO_NET_ERR;
1329    }
1330
1331    if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1332        queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1333        queues > n->max_queues ||
1334        !n->multiqueue) {
1335        return VIRTIO_NET_ERR;
1336    }
1337
1338    n->curr_queues = queues;
1339    /* stop the backend before changing the number of queues to avoid handling a
1340     * disabled queue */
1341    virtio_net_set_status(vdev, vdev->status);
1342    virtio_net_set_queues(n);
1343
1344    return VIRTIO_NET_OK;
1345}
1346
1347static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1348{
1349    VirtIONet *n = VIRTIO_NET(vdev);
1350    struct virtio_net_ctrl_hdr ctrl;
1351    virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1352    VirtQueueElement *elem;
1353    size_t s;
1354    struct iovec *iov, *iov2;
1355    unsigned int iov_cnt;
1356
1357    for (;;) {
1358        elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1359        if (!elem) {
1360            break;
1361        }
1362        if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1363            iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
1364            virtio_error(vdev, "virtio-net ctrl missing headers");
1365            virtqueue_detach_element(vq, elem, 0);
1366            g_free(elem);
1367            break;
1368        }
1369
1370        iov_cnt = elem->out_num;
1371        iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
1372        s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1373        iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1374        if (s != sizeof(ctrl)) {
1375            status = VIRTIO_NET_ERR;
1376        } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1377            status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1378        } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1379            status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1380        } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1381            status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
1382        } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1383            status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
1384        } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1385            status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
1386        } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1387            status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
1388        }
1389
1390        s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
1391        assert(s == sizeof(status));
1392
1393        virtqueue_push(vq, elem, sizeof(status));
1394        virtio_notify(vdev, vq);
1395        g_free(iov2);
1396        g_free(elem);
1397    }
1398}
1399
1400/* RX */
1401
1402static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1403{
1404    VirtIONet *n = VIRTIO_NET(vdev);
1405    int queue_index = vq2q(virtio_get_queue_index(vq));
1406
1407    qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1408}
1409
1410static bool virtio_net_can_receive(NetClientState *nc)
1411{
1412    VirtIONet *n = qemu_get_nic_opaque(nc);
1413    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1414    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1415
1416    if (!vdev->vm_running) {
1417        return false;
1418    }
1419
1420    if (nc->queue_index >= n->curr_queues) {
1421        return false;
1422    }
1423
1424    if (!virtio_queue_ready(q->rx_vq) ||
1425        !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1426        return false;
1427    }
1428
1429    return true;
1430}
1431
1432static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1433{
1434    VirtIONet *n = q->n;
1435    if (virtio_queue_empty(q->rx_vq) ||
1436        (n->mergeable_rx_bufs &&
1437         !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1438        virtio_queue_set_notification(q->rx_vq, 1);
1439
1440        /* To avoid a race condition where the guest has made some buffers
1441         * available after the above check but before notification was
1442         * enabled, check for available buffers again.
1443         */
1444        if (virtio_queue_empty(q->rx_vq) ||
1445            (n->mergeable_rx_bufs &&
1446             !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1447            return 0;
1448        }
1449    }
1450
1451    virtio_queue_set_notification(q->rx_vq, 0);
1452    return 1;
1453}
1454
1455static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1456{
1457    virtio_tswap16s(vdev, &hdr->hdr_len);
1458    virtio_tswap16s(vdev, &hdr->gso_size);
1459    virtio_tswap16s(vdev, &hdr->csum_start);
1460    virtio_tswap16s(vdev, &hdr->csum_offset);
1461}
1462
1463/* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1464 * it never finds out that the packets don't have valid checksums.  This
1465 * causes dhclient to get upset.  Fedora's carried a patch for ages to
1466 * fix this with Xen but it hasn't appeared in an upstream release of
1467 * dhclient yet.
1468 *
1469 * To avoid breaking existing guests, we catch udp packets and add
1470 * checksums.  This is terrible but it's better than hacking the guest
1471 * kernels.
1472 *
1473 * N.B. if we introduce a zero-copy API, this operation is no longer free so
1474 * we should provide a mechanism to disable it to avoid polluting the host
1475 * cache.
1476 */
1477static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1478                                        uint8_t *buf, size_t size)
1479{
1480    if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1481        (size > 27 && size < 1500) && /* normal sized MTU */
1482        (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1483        (buf[23] == 17) && /* ip.protocol == UDP */
1484        (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1485        net_checksum_calculate(buf, size);
1486        hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1487    }
1488}
1489
1490static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1491                           const void *buf, size_t size)
1492{
1493    if (n->has_vnet_hdr) {
1494        /* FIXME this cast is evil */
1495        void *wbuf = (void *)buf;
1496        work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1497                                    size - n->host_hdr_len);
1498
1499        if (n->needs_vnet_hdr_swap) {
1500            virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1501        }
1502        iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1503    } else {
1504        struct virtio_net_hdr hdr = {
1505            .flags = 0,
1506            .gso_type = VIRTIO_NET_HDR_GSO_NONE
1507        };
1508        iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1509    }
1510}
1511
1512static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1513{
1514    static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1515    static const uint8_t vlan[] = {0x81, 0x00};
1516    uint8_t *ptr = (uint8_t *)buf;
1517    int i;
1518
1519    if (n->promisc)
1520        return 1;
1521
1522    ptr += n->host_hdr_len;
1523
1524    if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1525        int vid = lduw_be_p(ptr + 14) & 0xfff;
1526        if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1527            return 0;
1528    }
1529
1530    if (ptr[0] & 1) { // multicast
1531        if (!memcmp(ptr, bcast, sizeof(bcast))) {
1532            return !n->nobcast;
1533        } else if (n->nomulti) {
1534            return 0;
1535        } else if (n->allmulti || n->mac_table.multi_overflow) {
1536            return 1;
1537        }
1538
1539        for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1540            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1541                return 1;
1542            }
1543        }
1544    } else { // unicast
1545        if (n->nouni) {
1546            return 0;
1547        } else if (n->alluni || n->mac_table.uni_overflow) {
1548            return 1;
1549        } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1550            return 1;
1551        }
1552
1553        for (i = 0; i < n->mac_table.first_multi; i++) {
1554            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1555                return 1;
1556            }
1557        }
1558    }
1559
1560    return 0;
1561}
1562
1563static uint8_t virtio_net_get_hash_type(bool isip4,
1564                                        bool isip6,
1565                                        bool isudp,
1566                                        bool istcp,
1567                                        uint32_t types)
1568{
1569    if (isip4) {
1570        if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
1571            return NetPktRssIpV4Tcp;
1572        }
1573        if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
1574            return NetPktRssIpV4Udp;
1575        }
1576        if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1577            return NetPktRssIpV4;
1578        }
1579    } else if (isip6) {
1580        uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
1581                        VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
1582
1583        if (istcp && (types & mask)) {
1584            return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
1585                NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
1586        }
1587        mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
1588        if (isudp && (types & mask)) {
1589            return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
1590                NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;
1591        }
1592        mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
1593        if (types & mask) {
1594            return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
1595                NetPktRssIpV6Ex : NetPktRssIpV6;
1596        }
1597    }
1598    return 0xff;
1599}
1600
1601static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1602                                   uint32_t hash)
1603{
1604    struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1605    hdr->hash_value = hash;
1606    hdr->hash_report = report;
1607}
1608
1609static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1610                                  size_t size)
1611{
1612    VirtIONet *n = qemu_get_nic_opaque(nc);
1613    unsigned int index = nc->queue_index, new_index = index;
1614    struct NetRxPkt *pkt = n->rx_pkt;
1615    uint8_t net_hash_type;
1616    uint32_t hash;
1617    bool isip4, isip6, isudp, istcp;
1618    static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1619        VIRTIO_NET_HASH_REPORT_IPv4,
1620        VIRTIO_NET_HASH_REPORT_TCPv4,
1621        VIRTIO_NET_HASH_REPORT_TCPv6,
1622        VIRTIO_NET_HASH_REPORT_IPv6,
1623        VIRTIO_NET_HASH_REPORT_IPv6_EX,
1624        VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1625        VIRTIO_NET_HASH_REPORT_UDPv4,
1626        VIRTIO_NET_HASH_REPORT_UDPv6,
1627        VIRTIO_NET_HASH_REPORT_UDPv6_EX
1628    };
1629
1630    net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
1631                             size - n->host_hdr_len);
1632    net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1633    if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
1634        istcp = isudp = false;
1635    }
1636    if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
1637        istcp = isudp = false;
1638    }
1639    net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
1640                                             n->rss_data.hash_types);
1641    if (net_hash_type > NetPktRssIpV6UdpEx) {
1642        if (n->rss_data.populate_hash) {
1643            virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1644        }
1645        return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1646    }
1647
1648    hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1649
1650    if (n->rss_data.populate_hash) {
1651        virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1652    }
1653
1654    if (n->rss_data.redirect) {
1655        new_index = hash & (n->rss_data.indirections_len - 1);
1656        new_index = n->rss_data.indirections_table[new_index];
1657    }
1658
1659    return (index == new_index) ? -1 : new_index;
1660}
1661
1662static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1663                                      size_t size, bool no_rss)
1664{
1665    VirtIONet *n = qemu_get_nic_opaque(nc);
1666    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1667    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1668    struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1669    struct virtio_net_hdr_mrg_rxbuf mhdr;
1670    unsigned mhdr_cnt = 0;
1671    size_t offset, i, guest_offset;
1672
1673    if (!virtio_net_can_receive(nc)) {
1674        return -1;
1675    }
1676
1677    if (!no_rss && n->rss_data.enabled) {
1678        int index = virtio_net_process_rss(nc, buf, size);
1679        if (index >= 0) {
1680            NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1681            return virtio_net_receive_rcu(nc2, buf, size, true);
1682        }
1683    }
1684
1685    /* hdr_len refers to the header we supply to the guest */
1686    if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1687        return 0;
1688    }
1689
1690    if (!receive_filter(n, buf, size))
1691        return size;
1692
1693    offset = i = 0;
1694
1695    while (offset < size) {
1696        VirtQueueElement *elem;
1697        int len, total;
1698        const struct iovec *sg;
1699
1700        total = 0;
1701
1702        elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1703        if (!elem) {
1704            if (i) {
1705                virtio_error(vdev, "virtio-net unexpected empty queue: "
1706                             "i %zd mergeable %d offset %zd, size %zd, "
1707                             "guest hdr len %zd, host hdr len %zd "
1708                             "guest features 0x%" PRIx64,
1709                             i, n->mergeable_rx_bufs, offset, size,
1710                             n->guest_hdr_len, n->host_hdr_len,
1711                             vdev->guest_features);
1712            }
1713            return -1;
1714        }
1715
1716        if (elem->in_num < 1) {
1717            virtio_error(vdev,
1718                         "virtio-net receive queue contains no in buffers");
1719            virtqueue_detach_element(q->rx_vq, elem, 0);
1720            g_free(elem);
1721            return -1;
1722        }
1723
1724        sg = elem->in_sg;
1725        if (i == 0) {
1726            assert(offset == 0);
1727            if (n->mergeable_rx_bufs) {
1728                mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1729                                    sg, elem->in_num,
1730                                    offsetof(typeof(mhdr), num_buffers),
1731                                    sizeof(mhdr.num_buffers));
1732            }
1733
1734            receive_header(n, sg, elem->in_num, buf, size);
1735            if (n->rss_data.populate_hash) {
1736                offset = sizeof(mhdr);
1737                iov_from_buf(sg, elem->in_num, offset,
1738                             buf + offset, n->host_hdr_len - sizeof(mhdr));
1739            }
1740            offset = n->host_hdr_len;
1741            total += n->guest_hdr_len;
1742            guest_offset = n->guest_hdr_len;
1743        } else {
1744            guest_offset = 0;
1745        }
1746
1747        /* copy in packet.  ugh */
1748        len = iov_from_buf(sg, elem->in_num, guest_offset,
1749                           buf + offset, size - offset);
1750        total += len;
1751        offset += len;
1752        /* If buffers can't be merged, at this point we
1753         * must have consumed the complete packet.
1754         * Otherwise, drop it. */
1755        if (!n->mergeable_rx_bufs && offset < size) {
1756            virtqueue_unpop(q->rx_vq, elem, total);
1757            g_free(elem);
1758            return size;
1759        }
1760
1761        /* signal other side */
1762        virtqueue_fill(q->rx_vq, elem, total, i++);
1763        g_free(elem);
1764    }
1765
1766    if (mhdr_cnt) {
1767        virtio_stw_p(vdev, &mhdr.num_buffers, i);
1768        iov_from_buf(mhdr_sg, mhdr_cnt,
1769                     0,
1770                     &mhdr.num_buffers, sizeof mhdr.num_buffers);
1771    }
1772
1773    virtqueue_flush(q->rx_vq, i);
1774    virtio_notify(vdev, q->rx_vq);
1775
1776    return size;
1777}
1778
1779static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1780                                  size_t size)
1781{
1782    RCU_READ_LOCK_GUARD();
1783
1784    return virtio_net_receive_rcu(nc, buf, size, false);
1785}
1786
1787static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1788                                         const uint8_t *buf,
1789                                         VirtioNetRscUnit *unit)
1790{
1791    uint16_t ip_hdrlen;
1792    struct ip_header *ip;
1793
1794    ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1795                              + sizeof(struct eth_header));
1796    unit->ip = (void *)ip;
1797    ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1798    unit->ip_plen = &ip->ip_len;
1799    unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1800    unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1801    unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1802}
1803
1804static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1805                                         const uint8_t *buf,
1806                                         VirtioNetRscUnit *unit)
1807{
1808    struct ip6_header *ip6;
1809
1810    ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1811                                 + sizeof(struct eth_header));
1812    unit->ip = ip6;
1813    unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1814    unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
1815                                        + sizeof(struct ip6_header));
1816    unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1817
1818    /* There is a difference between payload lenght in ipv4 and v6,
1819       ip header is excluded in ipv6 */
1820    unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1821}
1822
1823static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1824                                       VirtioNetRscSeg *seg)
1825{
1826    int ret;
1827    struct virtio_net_hdr_v1 *h;
1828
1829    h = (struct virtio_net_hdr_v1 *)seg->buf;
1830    h->flags = 0;
1831    h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1832
1833    if (seg->is_coalesced) {
1834        h->rsc.segments = seg->packets;
1835        h->rsc.dup_acks = seg->dup_ack;
1836        h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1837        if (chain->proto == ETH_P_IP) {
1838            h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1839        } else {
1840            h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1841        }
1842    }
1843
1844    ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1845    QTAILQ_REMOVE(&chain->buffers, seg, next);
1846    g_free(seg->buf);
1847    g_free(seg);
1848
1849    return ret;
1850}
1851
1852static void virtio_net_rsc_purge(void *opq)
1853{
1854    VirtioNetRscSeg *seg, *rn;
1855    VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1856
1857    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1858        if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1859            chain->stat.purge_failed++;
1860            continue;
1861        }
1862    }
1863
1864    chain->stat.timer++;
1865    if (!QTAILQ_EMPTY(&chain->buffers)) {
1866        timer_mod(chain->drain_timer,
1867              qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1868    }
1869}
1870
1871static void virtio_net_rsc_cleanup(VirtIONet *n)
1872{
1873    VirtioNetRscChain *chain, *rn_chain;
1874    VirtioNetRscSeg *seg, *rn_seg;
1875
1876    QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1877        QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1878            QTAILQ_REMOVE(&chain->buffers, seg, next);
1879            g_free(seg->buf);
1880            g_free(seg);
1881        }
1882
1883        timer_del(chain->drain_timer);
1884        timer_free(chain->drain_timer);
1885        QTAILQ_REMOVE(&n->rsc_chains, chain, next);
1886        g_free(chain);
1887    }
1888}
1889
1890static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
1891                                     NetClientState *nc,
1892                                     const uint8_t *buf, size_t size)
1893{
1894    uint16_t hdr_len;
1895    VirtioNetRscSeg *seg;
1896
1897    hdr_len = chain->n->guest_hdr_len;
1898    seg = g_malloc(sizeof(VirtioNetRscSeg));
1899    seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
1900        + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
1901    memcpy(seg->buf, buf, size);
1902    seg->size = size;
1903    seg->packets = 1;
1904    seg->dup_ack = 0;
1905    seg->is_coalesced = 0;
1906    seg->nc = nc;
1907
1908    QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
1909    chain->stat.cache++;
1910
1911    switch (chain->proto) {
1912    case ETH_P_IP:
1913        virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
1914        break;
1915    case ETH_P_IPV6:
1916        virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
1917        break;
1918    default:
1919        g_assert_not_reached();
1920    }
1921}
1922
1923static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
1924                                         VirtioNetRscSeg *seg,
1925                                         const uint8_t *buf,
1926                                         struct tcp_header *n_tcp,
1927                                         struct tcp_header *o_tcp)
1928{
1929    uint32_t nack, oack;
1930    uint16_t nwin, owin;
1931
1932    nack = htonl(n_tcp->th_ack);
1933    nwin = htons(n_tcp->th_win);
1934    oack = htonl(o_tcp->th_ack);
1935    owin = htons(o_tcp->th_win);
1936
1937    if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
1938        chain->stat.ack_out_of_win++;
1939        return RSC_FINAL;
1940    } else if (nack == oack) {
1941        /* duplicated ack or window probe */
1942        if (nwin == owin) {
1943            /* duplicated ack, add dup ack count due to whql test up to 1 */
1944            chain->stat.dup_ack++;
1945            return RSC_FINAL;
1946        } else {
1947            /* Coalesce window update */
1948            o_tcp->th_win = n_tcp->th_win;
1949            chain->stat.win_update++;
1950            return RSC_COALESCE;
1951        }
1952    } else {
1953        /* pure ack, go to 'C', finalize*/
1954        chain->stat.pure_ack++;
1955        return RSC_FINAL;
1956    }
1957}
1958
1959static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
1960                                            VirtioNetRscSeg *seg,
1961                                            const uint8_t *buf,
1962                                            VirtioNetRscUnit *n_unit)
1963{
1964    void *data;
1965    uint16_t o_ip_len;
1966    uint32_t nseq, oseq;
1967    VirtioNetRscUnit *o_unit;
1968
1969    o_unit = &seg->unit;
1970    o_ip_len = htons(*o_unit->ip_plen);
1971    nseq = htonl(n_unit->tcp->th_seq);
1972    oseq = htonl(o_unit->tcp->th_seq);
1973
1974    /* out of order or retransmitted. */
1975    if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
1976        chain->stat.data_out_of_win++;
1977        return RSC_FINAL;
1978    }
1979
1980    data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
1981    if (nseq == oseq) {
1982        if ((o_unit->payload == 0) && n_unit->payload) {
1983            /* From no payload to payload, normal case, not a dup ack or etc */
1984            chain->stat.data_after_pure_ack++;
1985            goto coalesce;
1986        } else {
1987            return virtio_net_rsc_handle_ack(chain, seg, buf,
1988                                             n_unit->tcp, o_unit->tcp);
1989        }
1990    } else if ((nseq - oseq) != o_unit->payload) {
1991        /* Not a consistent packet, out of order */
1992        chain->stat.data_out_of_order++;
1993        return RSC_FINAL;
1994    } else {
1995coalesce:
1996        if ((o_ip_len + n_unit->payload) > chain->max_payload) {
1997            chain->stat.over_size++;
1998            return RSC_FINAL;
1999        }
2000
2001        /* Here comes the right data, the payload length in v4/v6 is different,
2002           so use the field value to update and record the new data len */
2003        o_unit->payload += n_unit->payload; /* update new data len */
2004
2005        /* update field in ip header */
2006        *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2007
2008        /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2009           for windows guest, while this may change the behavior for linux
2010           guest (only if it uses RSC feature). */
2011        o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2012
2013        o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2014        o_unit->tcp->th_win = n_unit->tcp->th_win;
2015
2016        memmove(seg->buf + seg->size, data, n_unit->payload);
2017        seg->size += n_unit->payload;
2018        seg->packets++;
2019        chain->stat.coalesced++;
2020        return RSC_COALESCE;
2021    }
2022}
2023
2024static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2025                                        VirtioNetRscSeg *seg,
2026                                        const uint8_t *buf, size_t size,
2027                                        VirtioNetRscUnit *unit)
2028{
2029    struct ip_header *ip1, *ip2;
2030
2031    ip1 = (struct ip_header *)(unit->ip);
2032    ip2 = (struct ip_header *)(seg->unit.ip);
2033    if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2034        || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2035        || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2036        chain->stat.no_match++;
2037        return RSC_NO_MATCH;
2038    }
2039
2040    return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2041}
2042
2043static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2044                                        VirtioNetRscSeg *seg,
2045                                        const uint8_t *buf, size_t size,
2046                                        VirtioNetRscUnit *unit)
2047{
2048    struct ip6_header *ip1, *ip2;
2049
2050    ip1 = (struct ip6_header *)(unit->ip);
2051    ip2 = (struct ip6_header *)(seg->unit.ip);
2052    if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2053        || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2054        || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2055        || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2056            chain->stat.no_match++;
2057            return RSC_NO_MATCH;
2058    }
2059
2060    return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2061}
2062
2063/* Packets with 'SYN' should bypass, other flag should be sent after drain
2064 * to prevent out of order */
2065static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2066                                         struct tcp_header *tcp)
2067{
2068    uint16_t tcp_hdr;
2069    uint16_t tcp_flag;
2070
2071    tcp_flag = htons(tcp->th_offset_flags);
2072    tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2073    tcp_flag &= VIRTIO_NET_TCP_FLAG;
2074    if (tcp_flag & TH_SYN) {
2075        chain->stat.tcp_syn++;
2076        return RSC_BYPASS;
2077    }
2078
2079    if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2080        chain->stat.tcp_ctrl_drain++;
2081        return RSC_FINAL;
2082    }
2083
2084    if (tcp_hdr > sizeof(struct tcp_header)) {
2085        chain->stat.tcp_all_opt++;
2086        return RSC_FINAL;
2087    }
2088
2089    return RSC_CANDIDATE;
2090}
2091
2092static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2093                                         NetClientState *nc,
2094                                         const uint8_t *buf, size_t size,
2095                                         VirtioNetRscUnit *unit)
2096{
2097    int ret;
2098    VirtioNetRscSeg *seg, *nseg;
2099
2100    if (QTAILQ_EMPTY(&chain->buffers)) {
2101        chain->stat.empty_cache++;
2102        virtio_net_rsc_cache_buf(chain, nc, buf, size);
2103        timer_mod(chain->drain_timer,
2104              qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2105        return size;
2106    }
2107
2108    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2109        if (chain->proto == ETH_P_IP) {
2110            ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2111        } else {
2112            ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2113        }
2114
2115        if (ret == RSC_FINAL) {
2116            if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2117                /* Send failed */
2118                chain->stat.final_failed++;
2119                return 0;
2120            }
2121
2122            /* Send current packet */
2123            return virtio_net_do_receive(nc, buf, size);
2124        } else if (ret == RSC_NO_MATCH) {
2125            continue;
2126        } else {
2127            /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2128            seg->is_coalesced = 1;
2129            return size;
2130        }
2131    }
2132
2133    chain->stat.no_match_cache++;
2134    virtio_net_rsc_cache_buf(chain, nc, buf, size);
2135    return size;
2136}
2137
2138/* Drain a connection data, this is to avoid out of order segments */
2139static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2140                                        NetClientState *nc,
2141                                        const uint8_t *buf, size_t size,
2142                                        uint16_t ip_start, uint16_t ip_size,
2143                                        uint16_t tcp_port)
2144{
2145    VirtioNetRscSeg *seg, *nseg;
2146    uint32_t ppair1, ppair2;
2147
2148    ppair1 = *(uint32_t *)(buf + tcp_port);
2149    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2150        ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2151        if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2152            || (ppair1 != ppair2)) {
2153            continue;
2154        }
2155        if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2156            chain->stat.drain_failed++;
2157        }
2158
2159        break;
2160    }
2161
2162    return virtio_net_do_receive(nc, buf, size);
2163}
2164
2165static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2166                                            struct ip_header *ip,
2167                                            const uint8_t *buf, size_t size)
2168{
2169    uint16_t ip_len;
2170
2171    /* Not an ipv4 packet */
2172    if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2173        chain->stat.ip_option++;
2174        return RSC_BYPASS;
2175    }
2176
2177    /* Don't handle packets with ip option */
2178    if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2179        chain->stat.ip_option++;
2180        return RSC_BYPASS;
2181    }
2182
2183    if (ip->ip_p != IPPROTO_TCP) {
2184        chain->stat.bypass_not_tcp++;
2185        return RSC_BYPASS;
2186    }
2187
2188    /* Don't handle packets with ip fragment */
2189    if (!(htons(ip->ip_off) & IP_DF)) {
2190        chain->stat.ip_frag++;
2191        return RSC_BYPASS;
2192    }
2193
2194    /* Don't handle packets with ecn flag */
2195    if (IPTOS_ECN(ip->ip_tos)) {
2196        chain->stat.ip_ecn++;
2197        return RSC_BYPASS;
2198    }
2199
2200    ip_len = htons(ip->ip_len);
2201    if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2202        || ip_len > (size - chain->n->guest_hdr_len -
2203                     sizeof(struct eth_header))) {
2204        chain->stat.ip_hacked++;
2205        return RSC_BYPASS;
2206    }
2207
2208    return RSC_CANDIDATE;
2209}
2210
2211static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2212                                      NetClientState *nc,
2213                                      const uint8_t *buf, size_t size)
2214{
2215    int32_t ret;
2216    uint16_t hdr_len;
2217    VirtioNetRscUnit unit;
2218
2219    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2220
2221    if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2222        + sizeof(struct tcp_header))) {
2223        chain->stat.bypass_not_tcp++;
2224        return virtio_net_do_receive(nc, buf, size);
2225    }
2226
2227    virtio_net_rsc_extract_unit4(chain, buf, &unit);
2228    if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2229        != RSC_CANDIDATE) {
2230        return virtio_net_do_receive(nc, buf, size);
2231    }
2232
2233    ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2234    if (ret == RSC_BYPASS) {
2235        return virtio_net_do_receive(nc, buf, size);
2236    } else if (ret == RSC_FINAL) {
2237        return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2238                ((hdr_len + sizeof(struct eth_header)) + 12),
2239                VIRTIO_NET_IP4_ADDR_SIZE,
2240                hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2241    }
2242
2243    return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2244}
2245
2246static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2247                                            struct ip6_header *ip6,
2248                                            const uint8_t *buf, size_t size)
2249{
2250    uint16_t ip_len;
2251
2252    if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2253        != IP_HEADER_VERSION_6) {
2254        return RSC_BYPASS;
2255    }
2256
2257    /* Both option and protocol is checked in this */
2258    if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2259        chain->stat.bypass_not_tcp++;
2260        return RSC_BYPASS;
2261    }
2262
2263    ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2264    if (ip_len < sizeof(struct tcp_header) ||
2265        ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2266                  - sizeof(struct ip6_header))) {
2267        chain->stat.ip_hacked++;
2268        return RSC_BYPASS;
2269    }
2270
2271    /* Don't handle packets with ecn flag */
2272    if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2273        chain->stat.ip_ecn++;
2274        return RSC_BYPASS;
2275    }
2276
2277    return RSC_CANDIDATE;
2278}
2279
2280static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2281                                      const uint8_t *buf, size_t size)
2282{
2283    int32_t ret;
2284    uint16_t hdr_len;
2285    VirtioNetRscChain *chain;
2286    VirtioNetRscUnit unit;
2287
2288    chain = (VirtioNetRscChain *)opq;
2289    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2290
2291    if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2292        + sizeof(tcp_header))) {
2293        return virtio_net_do_receive(nc, buf, size);
2294    }
2295
2296    virtio_net_rsc_extract_unit6(chain, buf, &unit);
2297    if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2298                                                 unit.ip, buf, size)) {
2299        return virtio_net_do_receive(nc, buf, size);
2300    }
2301
2302    ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2303    if (ret == RSC_BYPASS) {
2304        return virtio_net_do_receive(nc, buf, size);
2305    } else if (ret == RSC_FINAL) {
2306        return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2307                ((hdr_len + sizeof(struct eth_header)) + 8),
2308                VIRTIO_NET_IP6_ADDR_SIZE,
2309                hdr_len + sizeof(struct eth_header)
2310                + sizeof(struct ip6_header));
2311    }
2312
2313    return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2314}
2315
2316static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2317                                                      NetClientState *nc,
2318                                                      uint16_t proto)
2319{
2320    VirtioNetRscChain *chain;
2321
2322    if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2323        return NULL;
2324    }
2325
2326    QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2327        if (chain->proto == proto) {
2328            return chain;
2329        }
2330    }
2331
2332    chain = g_malloc(sizeof(*chain));
2333    chain->n = n;
2334    chain->proto = proto;
2335    if (proto == (uint16_t)ETH_P_IP) {
2336        chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2337        chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2338    } else {
2339        chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2340        chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2341    }
2342    chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2343                                      virtio_net_rsc_purge, chain);
2344    memset(&chain->stat, 0, sizeof(chain->stat));
2345
2346    QTAILQ_INIT(&chain->buffers);
2347    QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2348
2349    return chain;
2350}
2351
2352static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2353                                      const uint8_t *buf,
2354                                      size_t size)
2355{
2356    uint16_t proto;
2357    VirtioNetRscChain *chain;
2358    struct eth_header *eth;
2359    VirtIONet *n;
2360
2361    n = qemu_get_nic_opaque(nc);
2362    if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2363        return virtio_net_do_receive(nc, buf, size);
2364    }
2365
2366    eth = (struct eth_header *)(buf + n->guest_hdr_len);
2367    proto = htons(eth->h_proto);
2368
2369    chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2370    if (chain) {
2371        chain->stat.received++;
2372        if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2373            return virtio_net_rsc_receive4(chain, nc, buf, size);
2374        } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2375            return virtio_net_rsc_receive6(chain, nc, buf, size);
2376        }
2377    }
2378    return virtio_net_do_receive(nc, buf, size);
2379}
2380
2381static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2382                                  size_t size)
2383{
2384    VirtIONet *n = qemu_get_nic_opaque(nc);
2385    if ((n->rsc4_enabled || n->rsc6_enabled)) {
2386        return virtio_net_rsc_receive(nc, buf, size);
2387    } else {
2388        return virtio_net_do_receive(nc, buf, size);
2389    }
2390}
2391
2392static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2393
2394static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2395{
2396    VirtIONet *n = qemu_get_nic_opaque(nc);
2397    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2398    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2399
2400    virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2401    virtio_notify(vdev, q->tx_vq);
2402
2403    g_free(q->async_tx.elem);
2404    q->async_tx.elem = NULL;
2405
2406    virtio_queue_set_notification(q->tx_vq, 1);
2407    virtio_net_flush_tx(q);
2408}
2409
2410/* TX */
2411static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2412{
2413    VirtIONet *n = q->n;
2414    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2415    VirtQueueElement *elem;
2416    int32_t num_packets = 0;
2417    int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2418    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2419        return num_packets;
2420    }
2421
2422    if (q->async_tx.elem) {
2423        virtio_queue_set_notification(q->tx_vq, 0);
2424        return num_packets;
2425    }
2426
2427    for (;;) {
2428        ssize_t ret;
2429        unsigned int out_num;
2430        struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2431        struct virtio_net_hdr_mrg_rxbuf mhdr;
2432
2433        elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2434        if (!elem) {
2435            break;
2436        }
2437
2438        out_num = elem->out_num;
2439        out_sg = elem->out_sg;
2440        if (out_num < 1) {
2441            virtio_error(vdev, "virtio-net header not in first element");
2442            virtqueue_detach_element(q->tx_vq, elem, 0);
2443            g_free(elem);
2444            return -EINVAL;
2445        }
2446
2447        if (n->has_vnet_hdr) {
2448            if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2449                n->guest_hdr_len) {
2450                virtio_error(vdev, "virtio-net header incorrect");
2451                virtqueue_detach_element(q->tx_vq, elem, 0);
2452                g_free(elem);
2453                return -EINVAL;
2454            }
2455            if (n->needs_vnet_hdr_swap) {
2456                virtio_net_hdr_swap(vdev, (void *) &mhdr);
2457                sg2[0].iov_base = &mhdr;
2458                sg2[0].iov_len = n->guest_hdr_len;
2459                out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2460                                   out_sg, out_num,
2461                                   n->guest_hdr_len, -1);
2462                if (out_num == VIRTQUEUE_MAX_SIZE) {
2463                    goto drop;
2464                }
2465                out_num += 1;
2466                out_sg = sg2;
2467            }
2468        }
2469        /*
2470         * If host wants to see the guest header as is, we can
2471         * pass it on unchanged. Otherwise, copy just the parts
2472         * that host is interested in.
2473         */
2474        assert(n->host_hdr_len <= n->guest_hdr_len);
2475        if (n->host_hdr_len != n->guest_hdr_len) {
2476            unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2477                                       out_sg, out_num,
2478                                       0, n->host_hdr_len);
2479            sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2480                             out_sg, out_num,
2481                             n->guest_hdr_len, -1);
2482            out_num = sg_num;
2483            out_sg = sg;
2484        }
2485
2486        ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2487                                      out_sg, out_num, virtio_net_tx_complete);
2488        if (ret == 0) {
2489            virtio_queue_set_notification(q->tx_vq, 0);
2490            q->async_tx.elem = elem;
2491            return -EBUSY;
2492        }
2493
2494drop:
2495        virtqueue_push(q->tx_vq, elem, 0);
2496        virtio_notify(vdev, q->tx_vq);
2497        g_free(elem);
2498
2499        if (++num_packets >= n->tx_burst) {
2500            break;
2501        }
2502    }
2503    return num_packets;
2504}
2505
2506static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2507{
2508    VirtIONet *n = VIRTIO_NET(vdev);
2509    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2510
2511    if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2512        virtio_net_drop_tx_queue_data(vdev, vq);
2513        return;
2514    }
2515
2516    /* This happens when device was stopped but VCPU wasn't. */
2517    if (!vdev->vm_running) {
2518        q->tx_waiting = 1;
2519        return;
2520    }
2521
2522    if (q->tx_waiting) {
2523        virtio_queue_set_notification(vq, 1);
2524        timer_del(q->tx_timer);
2525        q->tx_waiting = 0;
2526        if (virtio_net_flush_tx(q) == -EINVAL) {
2527            return;
2528        }
2529    } else {
2530        timer_mod(q->tx_timer,
2531                       qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2532        q->tx_waiting = 1;
2533        virtio_queue_set_notification(vq, 0);
2534    }
2535}
2536
2537static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2538{
2539    VirtIONet *n = VIRTIO_NET(vdev);
2540    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2541
2542    if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2543        virtio_net_drop_tx_queue_data(vdev, vq);
2544        return;
2545    }
2546
2547    if (unlikely(q->tx_waiting)) {
2548        return;
2549    }
2550    q->tx_waiting = 1;
2551    /* This happens when device was stopped but VCPU wasn't. */
2552    if (!vdev->vm_running) {
2553        return;
2554    }
2555    virtio_queue_set_notification(vq, 0);
2556    qemu_bh_schedule(q->tx_bh);
2557}
2558
2559static void virtio_net_tx_timer(void *opaque)
2560{
2561    VirtIONetQueue *q = opaque;
2562    VirtIONet *n = q->n;
2563    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2564    /* This happens when device was stopped but BH wasn't. */
2565    if (!vdev->vm_running) {
2566        /* Make sure tx waiting is set, so we'll run when restarted. */
2567        assert(q->tx_waiting);
2568        return;
2569    }
2570
2571    q->tx_waiting = 0;
2572
2573    /* Just in case the driver is not ready on more */
2574    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2575        return;
2576    }
2577
2578    virtio_queue_set_notification(q->tx_vq, 1);
2579    virtio_net_flush_tx(q);
2580}
2581
2582static void virtio_net_tx_bh(void *opaque)
2583{
2584    VirtIONetQueue *q = opaque;
2585    VirtIONet *n = q->n;
2586    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2587    int32_t ret;
2588
2589    /* This happens when device was stopped but BH wasn't. */
2590    if (!vdev->vm_running) {
2591        /* Make sure tx waiting is set, so we'll run when restarted. */
2592        assert(q->tx_waiting);
2593        return;
2594    }
2595
2596    q->tx_waiting = 0;
2597
2598    /* Just in case the driver is not ready on more */
2599    if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2600        return;
2601    }
2602
2603    ret = virtio_net_flush_tx(q);
2604    if (ret == -EBUSY || ret == -EINVAL) {
2605        return; /* Notification re-enable handled by tx_complete or device
2606                 * broken */
2607    }
2608
2609    /* If we flush a full burst of packets, assume there are
2610     * more coming and immediately reschedule */
2611    if (ret >= n->tx_burst) {
2612        qemu_bh_schedule(q->tx_bh);
2613        q->tx_waiting = 1;
2614        return;
2615    }
2616
2617    /* If less than a full burst, re-enable notification and flush
2618     * anything that may have come in while we weren't looking.  If
2619     * we find something, assume the guest is still active and reschedule */
2620    virtio_queue_set_notification(q->tx_vq, 1);
2621    ret = virtio_net_flush_tx(q);
2622    if (ret == -EINVAL) {
2623        return;
2624    } else if (ret > 0) {
2625        virtio_queue_set_notification(q->tx_vq, 0);
2626        qemu_bh_schedule(q->tx_bh);
2627        q->tx_waiting = 1;
2628    }
2629}
2630
2631static void virtio_net_add_queue(VirtIONet *n, int index)
2632{
2633    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2634
2635    n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2636                                           virtio_net_handle_rx);
2637
2638    if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2639        n->vqs[index].tx_vq =
2640            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2641                             virtio_net_handle_tx_timer);
2642        n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2643                                              virtio_net_tx_timer,
2644                                              &n->vqs[index]);
2645    } else {
2646        n->vqs[index].tx_vq =
2647            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2648                             virtio_net_handle_tx_bh);
2649        n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2650    }
2651
2652    n->vqs[index].tx_waiting = 0;
2653    n->vqs[index].n = n;
2654}
2655
2656static void virtio_net_del_queue(VirtIONet *n, int index)
2657{
2658    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2659    VirtIONetQueue *q = &n->vqs[index];
2660    NetClientState *nc = qemu_get_subqueue(n->nic, index);
2661
2662    qemu_purge_queued_packets(nc);
2663
2664    virtio_del_queue(vdev, index * 2);
2665    if (q->tx_timer) {
2666        timer_del(q->tx_timer);
2667        timer_free(q->tx_timer);
2668        q->tx_timer = NULL;
2669    } else {
2670        qemu_bh_delete(q->tx_bh);
2671        q->tx_bh = NULL;
2672    }
2673    q->tx_waiting = 0;
2674    virtio_del_queue(vdev, index * 2 + 1);
2675}
2676
2677static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
2678{
2679    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2680    int old_num_queues = virtio_get_num_queues(vdev);
2681    int new_num_queues = new_max_queues * 2 + 1;
2682    int i;
2683
2684    assert(old_num_queues >= 3);
2685    assert(old_num_queues % 2 == 1);
2686
2687    if (old_num_queues == new_num_queues) {
2688        return;
2689    }
2690
2691    /*
2692     * We always need to remove and add ctrl vq if
2693     * old_num_queues != new_num_queues. Remove ctrl_vq first,
2694     * and then we only enter one of the following two loops.
2695     */
2696    virtio_del_queue(vdev, old_num_queues - 1);
2697
2698    for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2699        /* new_num_queues < old_num_queues */
2700        virtio_net_del_queue(n, i / 2);
2701    }
2702
2703    for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2704        /* new_num_queues > old_num_queues */
2705        virtio_net_add_queue(n, i / 2);
2706    }
2707
2708    /* add ctrl_vq last */
2709    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2710}
2711
2712static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2713{
2714    int max = multiqueue ? n->max_queues : 1;
2715
2716    n->multiqueue = multiqueue;
2717    virtio_net_change_num_queues(n, max);
2718
2719    virtio_net_set_queues(n);
2720}
2721
2722static int virtio_net_post_load_device(void *opaque, int version_id)
2723{
2724    VirtIONet *n = opaque;
2725    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2726    int i, link_down;
2727
2728    trace_virtio_net_post_load_device();
2729    virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2730                               virtio_vdev_has_feature(vdev,
2731                                                       VIRTIO_F_VERSION_1),
2732                               virtio_vdev_has_feature(vdev,
2733                                                       VIRTIO_NET_F_HASH_REPORT));
2734
2735    /* MAC_TABLE_ENTRIES may be different from the saved image */
2736    if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2737        n->mac_table.in_use = 0;
2738    }
2739
2740    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2741        n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2742    }
2743
2744    /*
2745     * curr_guest_offloads will be later overwritten by the
2746     * virtio_set_features_nocheck call done from the virtio_load.
2747     * Here we make sure it is preserved and restored accordingly
2748     * in the virtio_net_post_load_virtio callback.
2749     */
2750    n->saved_guest_offloads = n->curr_guest_offloads;
2751
2752    virtio_net_set_queues(n);
2753
2754    /* Find the first multicast entry in the saved MAC filter */
2755    for (i = 0; i < n->mac_table.in_use; i++) {
2756        if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2757            break;
2758        }
2759    }
2760    n->mac_table.first_multi = i;
2761
2762    /* nc.link_down can't be migrated, so infer link_down according
2763     * to link status bit in n->status */
2764    link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2765    for (i = 0; i < n->max_queues; i++) {
2766        qemu_get_subqueue(n->nic, i)->link_down = link_down;
2767    }
2768
2769    if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2770        virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2771        qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2772                                  QEMU_CLOCK_VIRTUAL,
2773                                  virtio_net_announce_timer, n);
2774        if (n->announce_timer.round) {
2775            timer_mod(n->announce_timer.tm,
2776                      qemu_clock_get_ms(n->announce_timer.type));
2777        } else {
2778            qemu_announce_timer_del(&n->announce_timer, false);
2779        }
2780    }
2781
2782    if (n->rss_data.enabled) {
2783        trace_virtio_net_rss_enable(n->rss_data.hash_types,
2784                                    n->rss_data.indirections_len,
2785                                    sizeof(n->rss_data.key));
2786    } else {
2787        trace_virtio_net_rss_disable();
2788    }
2789    return 0;
2790}
2791
2792static int virtio_net_post_load_virtio(VirtIODevice *vdev)
2793{
2794    VirtIONet *n = VIRTIO_NET(vdev);
2795    /*
2796     * The actual needed state is now in saved_guest_offloads,
2797     * see virtio_net_post_load_device for detail.
2798     * Restore it back and apply the desired offloads.
2799     */
2800    n->curr_guest_offloads = n->saved_guest_offloads;
2801    if (peer_has_vnet_hdr(n)) {
2802        virtio_net_apply_guest_offloads(n);
2803    }
2804
2805    return 0;
2806}
2807
2808/* tx_waiting field of a VirtIONetQueue */
2809static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2810    .name = "virtio-net-queue-tx_waiting",
2811    .fields = (VMStateField[]) {
2812        VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2813        VMSTATE_END_OF_LIST()
2814   },
2815};
2816
2817static bool max_queues_gt_1(void *opaque, int version_id)
2818{
2819    return VIRTIO_NET(opaque)->max_queues > 1;
2820}
2821
2822static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2823{
2824    return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2825                                   VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2826}
2827
2828static bool mac_table_fits(void *opaque, int version_id)
2829{
2830    return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2831}
2832
2833static bool mac_table_doesnt_fit(void *opaque, int version_id)
2834{
2835    return !mac_table_fits(opaque, version_id);
2836}
2837
2838/* This temporary type is shared by all the WITH_TMP methods
2839 * although only some fields are used by each.
2840 */
2841struct VirtIONetMigTmp {
2842    VirtIONet      *parent;
2843    VirtIONetQueue *vqs_1;
2844    uint16_t        curr_queues_1;
2845    uint8_t         has_ufo;
2846    uint32_t        has_vnet_hdr;
2847};
2848
2849/* The 2nd and subsequent tx_waiting flags are loaded later than
2850 * the 1st entry in the queues and only if there's more than one
2851 * entry.  We use the tmp mechanism to calculate a temporary
2852 * pointer and count and also validate the count.
2853 */
2854
2855static int virtio_net_tx_waiting_pre_save(void *opaque)
2856{
2857    struct VirtIONetMigTmp *tmp = opaque;
2858
2859    tmp->vqs_1 = tmp->parent->vqs + 1;
2860    tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
2861    if (tmp->parent->curr_queues == 0) {
2862        tmp->curr_queues_1 = 0;
2863    }
2864
2865    return 0;
2866}
2867
2868static int virtio_net_tx_waiting_pre_load(void *opaque)
2869{
2870    struct VirtIONetMigTmp *tmp = opaque;
2871
2872    /* Reuse the pointer setup from save */
2873    virtio_net_tx_waiting_pre_save(opaque);
2874
2875    if (tmp->parent->curr_queues > tmp->parent->max_queues) {
2876        error_report("virtio-net: curr_queues %x > max_queues %x",
2877            tmp->parent->curr_queues, tmp->parent->max_queues);
2878
2879        return -EINVAL;
2880    }
2881
2882    return 0; /* all good */
2883}
2884
2885static const VMStateDescription vmstate_virtio_net_tx_waiting = {
2886    .name      = "virtio-net-tx_waiting",
2887    .pre_load  = virtio_net_tx_waiting_pre_load,
2888    .pre_save  = virtio_net_tx_waiting_pre_save,
2889    .fields    = (VMStateField[]) {
2890        VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
2891                                     curr_queues_1,
2892                                     vmstate_virtio_net_queue_tx_waiting,
2893                                     struct VirtIONetQueue),
2894        VMSTATE_END_OF_LIST()
2895    },
2896};
2897
2898/* the 'has_ufo' flag is just tested; if the incoming stream has the
2899 * flag set we need to check that we have it
2900 */
2901static int virtio_net_ufo_post_load(void *opaque, int version_id)
2902{
2903    struct VirtIONetMigTmp *tmp = opaque;
2904
2905    if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
2906        error_report("virtio-net: saved image requires TUN_F_UFO support");
2907        return -EINVAL;
2908    }
2909
2910    return 0;
2911}
2912
2913static int virtio_net_ufo_pre_save(void *opaque)
2914{
2915    struct VirtIONetMigTmp *tmp = opaque;
2916
2917    tmp->has_ufo = tmp->parent->has_ufo;
2918
2919    return 0;
2920}
2921
2922static const VMStateDescription vmstate_virtio_net_has_ufo = {
2923    .name      = "virtio-net-ufo",
2924    .post_load = virtio_net_ufo_post_load,
2925    .pre_save  = virtio_net_ufo_pre_save,
2926    .fields    = (VMStateField[]) {
2927        VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
2928        VMSTATE_END_OF_LIST()
2929    },
2930};
2931
2932/* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
2933 * flag set we need to check that we have it
2934 */
2935static int virtio_net_vnet_post_load(void *opaque, int version_id)
2936{
2937    struct VirtIONetMigTmp *tmp = opaque;
2938
2939    if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
2940        error_report("virtio-net: saved image requires vnet_hdr=on");
2941        return -EINVAL;
2942    }
2943
2944    return 0;
2945}
2946
2947static int virtio_net_vnet_pre_save(void *opaque)
2948{
2949    struct VirtIONetMigTmp *tmp = opaque;
2950
2951    tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
2952
2953    return 0;
2954}
2955
2956static const VMStateDescription vmstate_virtio_net_has_vnet = {
2957    .name      = "virtio-net-vnet",
2958    .post_load = virtio_net_vnet_post_load,
2959    .pre_save  = virtio_net_vnet_pre_save,
2960    .fields    = (VMStateField[]) {
2961        VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
2962        VMSTATE_END_OF_LIST()
2963    },
2964};
2965
2966static bool virtio_net_rss_needed(void *opaque)
2967{
2968    return VIRTIO_NET(opaque)->rss_data.enabled;
2969}
2970
2971static const VMStateDescription vmstate_virtio_net_rss = {
2972    .name      = "virtio-net-device/rss",
2973    .version_id = 1,
2974    .minimum_version_id = 1,
2975    .needed = virtio_net_rss_needed,
2976    .fields = (VMStateField[]) {
2977        VMSTATE_BOOL(rss_data.enabled, VirtIONet),
2978        VMSTATE_BOOL(rss_data.redirect, VirtIONet),
2979        VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
2980        VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
2981        VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
2982        VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
2983        VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
2984                            VIRTIO_NET_RSS_MAX_KEY_SIZE),
2985        VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
2986                                    rss_data.indirections_len, 0,
2987                                    vmstate_info_uint16, uint16_t),
2988        VMSTATE_END_OF_LIST()
2989    },
2990};
2991
2992static const VMStateDescription vmstate_virtio_net_device = {
2993    .name = "virtio-net-device",
2994    .version_id = VIRTIO_NET_VM_VERSION,
2995    .minimum_version_id = VIRTIO_NET_VM_VERSION,
2996    .post_load = virtio_net_post_load_device,
2997    .fields = (VMStateField[]) {
2998        VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
2999        VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3000                               vmstate_virtio_net_queue_tx_waiting,
3001                               VirtIONetQueue),
3002        VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3003        VMSTATE_UINT16(status, VirtIONet),
3004        VMSTATE_UINT8(promisc, VirtIONet),
3005        VMSTATE_UINT8(allmulti, VirtIONet),
3006        VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3007
3008        /* Guarded pair: If it fits we load it, else we throw it away
3009         * - can happen if source has a larger MAC table.; post-load
3010         *  sets flags in this case.
3011         */
3012        VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3013                                0, mac_table_fits, mac_table.in_use,
3014                                 ETH_ALEN),
3015        VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3016                                     mac_table.in_use, ETH_ALEN),
3017
3018        /* Note: This is an array of uint32's that's always been saved as a
3019         * buffer; hold onto your endiannesses; it's actually used as a bitmap
3020         * but based on the uint.
3021         */
3022        VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3023        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3024                         vmstate_virtio_net_has_vnet),
3025        VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3026        VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3027        VMSTATE_UINT8(alluni, VirtIONet),
3028        VMSTATE_UINT8(nomulti, VirtIONet),
3029        VMSTATE_UINT8(nouni, VirtIONet),
3030        VMSTATE_UINT8(nobcast, VirtIONet),
3031        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3032                         vmstate_virtio_net_has_ufo),
3033        VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
3034                            vmstate_info_uint16_equal, uint16_t),
3035        VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
3036        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3037                         vmstate_virtio_net_tx_waiting),
3038        VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3039                            has_ctrl_guest_offloads),
3040        VMSTATE_END_OF_LIST()
3041   },
3042    .subsections = (const VMStateDescription * []) {
3043        &vmstate_virtio_net_rss,
3044        NULL
3045    }
3046};
3047
3048static NetClientInfo net_virtio_info = {
3049    .type = NET_CLIENT_DRIVER_NIC,
3050    .size = sizeof(NICState),
3051    .can_receive = virtio_net_can_receive,
3052    .receive = virtio_net_receive,
3053    .link_status_changed = virtio_net_set_link_status,
3054    .query_rx_filter = virtio_net_query_rxfilter,
3055    .announce = virtio_net_announce,
3056};
3057
3058static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3059{
3060    VirtIONet *n = VIRTIO_NET(vdev);
3061    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3062    assert(n->vhost_started);
3063    return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3064}
3065
3066static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3067                                           bool mask)
3068{
3069    VirtIONet *n = VIRTIO_NET(vdev);
3070    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3071    assert(n->vhost_started);
3072    vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
3073                             vdev, idx, mask);
3074}
3075
3076static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3077{
3078    virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3079
3080    n->config_size = virtio_feature_get_config_size(feature_sizes,
3081                                                    host_features);
3082}
3083
3084void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3085                                   const char *type)
3086{
3087    /*
3088     * The name can be NULL, the netclient name will be type.x.
3089     */
3090    assert(type != NULL);
3091
3092    g_free(n->netclient_name);
3093    g_free(n->netclient_type);
3094    n->netclient_name = g_strdup(name);
3095    n->netclient_type = g_strdup(type);
3096}
3097
3098static bool failover_unplug_primary(VirtIONet *n)
3099{
3100    HotplugHandler *hotplug_ctrl;
3101    PCIDevice *pci_dev;
3102    Error *err = NULL;
3103
3104    hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev);
3105    if (hotplug_ctrl) {
3106        pci_dev = PCI_DEVICE(n->primary_dev);
3107        pci_dev->partially_hotplugged = true;
3108        hotplug_handler_unplug_request(hotplug_ctrl, n->primary_dev, &err);
3109        if (err) {
3110            error_report_err(err);
3111            return false;
3112        }
3113    } else {
3114        return false;
3115    }
3116    return true;
3117}
3118
3119static bool failover_replug_primary(VirtIONet *n, Error **errp)
3120{
3121    Error *err = NULL;
3122    HotplugHandler *hotplug_ctrl;
3123    PCIDevice *pdev = PCI_DEVICE(n->primary_dev);
3124
3125    if (!pdev->partially_hotplugged) {
3126        return true;
3127    }
3128    if (!n->primary_device_opts) {
3129        n->primary_device_opts = qemu_opts_from_qdict(
3130                qemu_find_opts("device"),
3131                n->primary_device_dict, errp);
3132        if (!n->primary_device_opts) {
3133            return false;
3134        }
3135    }
3136    n->primary_bus = n->primary_dev->parent_bus;
3137    if (!n->primary_bus) {
3138        error_setg(errp, "virtio_net: couldn't find primary bus");
3139        return false;
3140    }
3141    qdev_set_parent_bus(n->primary_dev, n->primary_bus, &error_abort);
3142    n->primary_should_be_hidden = false;
3143    if (!qemu_opt_set_bool(n->primary_device_opts,
3144                           "partially_hotplugged", true, errp)) {
3145        return false;
3146    }
3147    hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev);
3148    if (hotplug_ctrl) {
3149        hotplug_handler_pre_plug(hotplug_ctrl, n->primary_dev, &err);
3150        if (err) {
3151            goto out;
3152        }
3153        hotplug_handler_plug(hotplug_ctrl, n->primary_dev, &err);
3154    }
3155
3156out:
3157    error_propagate(errp, err);
3158    return !err;
3159}
3160
3161static void virtio_net_handle_migration_primary(VirtIONet *n,
3162                                                MigrationState *s)
3163{
3164    bool should_be_hidden;
3165    Error *err = NULL;
3166
3167    should_be_hidden = qatomic_read(&n->primary_should_be_hidden);
3168
3169    if (!n->primary_dev) {
3170        n->primary_dev = virtio_connect_failover_devices(n, n->qdev, &err);
3171        if (!n->primary_dev) {
3172            return;
3173        }
3174    }
3175
3176    if (migration_in_setup(s) && !should_be_hidden) {
3177        if (failover_unplug_primary(n)) {
3178            vmstate_unregister(VMSTATE_IF(n->primary_dev),
3179                    qdev_get_vmsd(n->primary_dev),
3180                    n->primary_dev);
3181            qapi_event_send_unplug_primary(n->primary_device_id);
3182            qatomic_set(&n->primary_should_be_hidden, true);
3183        } else {
3184            warn_report("couldn't unplug primary device");
3185        }
3186    } else if (migration_has_failed(s)) {
3187        /* We already unplugged the device let's plug it back */
3188        if (!failover_replug_primary(n, &err)) {
3189            if (err) {
3190                error_report_err(err);
3191            }
3192        }
3193    }
3194}
3195
3196static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3197{
3198    MigrationState *s = data;
3199    VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3200    virtio_net_handle_migration_primary(n, s);
3201}
3202
3203static int virtio_net_primary_should_be_hidden(DeviceListener *listener,
3204            QemuOpts *device_opts)
3205{
3206    VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3207    bool match_found = false;
3208    bool hide = false;
3209
3210    if (!device_opts) {
3211        return -1;
3212    }
3213    n->primary_device_dict = qemu_opts_to_qdict(device_opts,
3214            n->primary_device_dict);
3215    if (n->primary_device_dict) {
3216        g_free(n->standby_id);
3217        n->standby_id = g_strdup(qdict_get_try_str(n->primary_device_dict,
3218                    "failover_pair_id"));
3219    }
3220    if (g_strcmp0(n->standby_id, n->netclient_name) == 0) {
3221        match_found = true;
3222    } else {
3223        match_found = false;
3224        hide = false;
3225        g_free(n->standby_id);
3226        n->primary_device_dict = NULL;
3227        goto out;
3228    }
3229
3230    n->primary_device_opts = device_opts;
3231
3232    /* primary_should_be_hidden is set during feature negotiation */
3233    hide = qatomic_read(&n->primary_should_be_hidden);
3234
3235    if (n->primary_device_dict) {
3236        g_free(n->primary_device_id);
3237        n->primary_device_id = g_strdup(qdict_get_try_str(
3238                    n->primary_device_dict, "id"));
3239        if (!n->primary_device_id) {
3240            warn_report("primary_device_id not set");
3241        }
3242    }
3243
3244out:
3245    if (match_found && hide) {
3246        return 1;
3247    } else if (match_found && !hide) {
3248        return 0;
3249    } else {
3250        return -1;
3251    }
3252}
3253
3254static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3255{
3256    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3257    VirtIONet *n = VIRTIO_NET(dev);
3258    NetClientState *nc;
3259    int i;
3260
3261    if (n->net_conf.mtu) {
3262        n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3263    }
3264
3265    if (n->net_conf.duplex_str) {
3266        if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3267            n->net_conf.duplex = DUPLEX_HALF;
3268        } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3269            n->net_conf.duplex = DUPLEX_FULL;
3270        } else {
3271            error_setg(errp, "'duplex' must be 'half' or 'full'");
3272            return;
3273        }
3274        n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3275    } else {
3276        n->net_conf.duplex = DUPLEX_UNKNOWN;
3277    }
3278
3279    if (n->net_conf.speed < SPEED_UNKNOWN) {
3280        error_setg(errp, "'speed' must be between 0 and INT_MAX");
3281        return;
3282    }
3283    if (n->net_conf.speed >= 0) {
3284        n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3285    }
3286
3287    if (n->failover) {
3288        n->primary_listener.should_be_hidden =
3289            virtio_net_primary_should_be_hidden;
3290        qatomic_set(&n->primary_should_be_hidden, true);
3291        device_listener_register(&n->primary_listener);
3292        n->migration_state.notify = virtio_net_migration_state_notifier;
3293        add_migration_state_change_notifier(&n->migration_state);
3294        n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3295    }
3296
3297    virtio_net_set_config_size(n, n->host_features);
3298    virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
3299
3300    /*
3301     * We set a lower limit on RX queue size to what it always was.
3302     * Guests that want a smaller ring can always resize it without
3303     * help from us (using virtio 1 and up).
3304     */
3305    if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3306        n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3307        !is_power_of_2(n->net_conf.rx_queue_size)) {
3308        error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3309                   "must be a power of 2 between %d and %d.",
3310                   n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3311                   VIRTQUEUE_MAX_SIZE);
3312        virtio_cleanup(vdev);
3313        return;
3314    }
3315
3316    if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3317        n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
3318        !is_power_of_2(n->net_conf.tx_queue_size)) {
3319        error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3320                   "must be a power of 2 between %d and %d",
3321                   n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3322                   VIRTQUEUE_MAX_SIZE);
3323        virtio_cleanup(vdev);
3324        return;
3325    }
3326
3327    n->max_queues = MAX(n->nic_conf.peers.queues, 1);
3328    if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
3329        error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
3330                   "must be a positive integer less than %d.",
3331                   n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
3332        virtio_cleanup(vdev);
3333        return;
3334    }
3335    n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
3336    n->curr_queues = 1;
3337    n->tx_timeout = n->net_conf.txtimer;
3338
3339    if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3340                       && strcmp(n->net_conf.tx, "bh")) {
3341        warn_report("virtio-net: "
3342                    "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3343                    n->net_conf.tx);
3344        error_printf("Defaulting to \"bh\"");
3345    }
3346
3347    n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3348                                    n->net_conf.tx_queue_size);
3349
3350    for (i = 0; i < n->max_queues; i++) {
3351        virtio_net_add_queue(n, i);
3352    }
3353
3354    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3355    qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3356    memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3357    n->status = VIRTIO_NET_S_LINK_UP;
3358    qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3359                              QEMU_CLOCK_VIRTUAL,
3360                              virtio_net_announce_timer, n);
3361    n->announce_timer.round = 0;
3362
3363    if (n->netclient_type) {
3364        /*
3365         * Happen when virtio_net_set_netclient_name has been called.
3366         */
3367        n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3368                              n->netclient_type, n->netclient_name, n);
3369    } else {
3370        n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3371                              object_get_typename(OBJECT(dev)), dev->id, n);
3372    }
3373
3374    peer_test_vnet_hdr(n);
3375    if (peer_has_vnet_hdr(n)) {
3376        for (i = 0; i < n->max_queues; i++) {
3377            qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3378        }
3379        n->host_hdr_len = sizeof(struct virtio_net_hdr);
3380    } else {
3381        n->host_hdr_len = 0;
3382    }
3383
3384    qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3385
3386    n->vqs[0].tx_waiting = 0;
3387    n->tx_burst = n->net_conf.txburst;
3388    virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3389    n->promisc = 1; /* for compatibility */
3390
3391    n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3392
3393    n->vlans = g_malloc0(MAX_VLAN >> 3);
3394
3395    nc = qemu_get_queue(n->nic);
3396    nc->rxfilter_notify_enabled = 1;
3397
3398   if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3399        struct virtio_net_config netcfg = {};
3400        memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3401        vhost_net_set_config(get_vhost_net(nc->peer),
3402            (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER);
3403    }
3404    QTAILQ_INIT(&n->rsc_chains);
3405    n->qdev = dev;
3406
3407    net_rx_pkt_init(&n->rx_pkt, false);
3408}
3409
3410static void virtio_net_device_unrealize(DeviceState *dev)
3411{
3412    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3413    VirtIONet *n = VIRTIO_NET(dev);
3414    int i, max_queues;
3415
3416    /* This will stop vhost backend if appropriate. */
3417    virtio_net_set_status(vdev, 0);
3418
3419    g_free(n->netclient_name);
3420    n->netclient_name = NULL;
3421    g_free(n->netclient_type);
3422    n->netclient_type = NULL;
3423
3424    g_free(n->mac_table.macs);
3425    g_free(n->vlans);
3426
3427    if (n->failover) {
3428        device_listener_unregister(&n->primary_listener);
3429        g_free(n->primary_device_id);
3430        g_free(n->standby_id);
3431        qobject_unref(n->primary_device_dict);
3432        n->primary_device_dict = NULL;
3433    }
3434
3435    max_queues = n->multiqueue ? n->max_queues : 1;
3436    for (i = 0; i < max_queues; i++) {
3437        virtio_net_del_queue(n, i);
3438    }
3439    /* delete also control vq */
3440    virtio_del_queue(vdev, max_queues * 2);
3441    qemu_announce_timer_del(&n->announce_timer, false);
3442    g_free(n->vqs);
3443    qemu_del_nic(n->nic);
3444    virtio_net_rsc_cleanup(n);
3445    g_free(n->rss_data.indirections_table);
3446    net_rx_pkt_uninit(n->rx_pkt);
3447    virtio_cleanup(vdev);
3448}
3449
3450static void virtio_net_instance_init(Object *obj)
3451{
3452    VirtIONet *n = VIRTIO_NET(obj);
3453
3454    /*
3455     * The default config_size is sizeof(struct virtio_net_config).
3456     * Can be overriden with virtio_net_set_config_size.
3457     */
3458    n->config_size = sizeof(struct virtio_net_config);
3459    device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3460                                  "bootindex", "/ethernet-phy@0",
3461                                  DEVICE(n));
3462}
3463
3464static int virtio_net_pre_save(void *opaque)
3465{
3466    VirtIONet *n = opaque;
3467
3468    /* At this point, backend must be stopped, otherwise
3469     * it might keep writing to memory. */
3470    assert(!n->vhost_started);
3471
3472    return 0;
3473}
3474
3475static bool primary_unplug_pending(void *opaque)
3476{
3477    DeviceState *dev = opaque;
3478    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3479    VirtIONet *n = VIRTIO_NET(vdev);
3480
3481    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3482        return false;
3483    }
3484    return n->primary_dev ? n->primary_dev->pending_deleted_event : false;
3485}
3486
3487static bool dev_unplug_pending(void *opaque)
3488{
3489    DeviceState *dev = opaque;
3490    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3491
3492    return vdc->primary_unplug_pending(dev);
3493}
3494
3495static const VMStateDescription vmstate_virtio_net = {
3496    .name = "virtio-net",
3497    .minimum_version_id = VIRTIO_NET_VM_VERSION,
3498    .version_id = VIRTIO_NET_VM_VERSION,
3499    .fields = (VMStateField[]) {
3500        VMSTATE_VIRTIO_DEVICE,
3501        VMSTATE_END_OF_LIST()
3502    },
3503    .pre_save = virtio_net_pre_save,
3504    .dev_unplug_pending = dev_unplug_pending,
3505};
3506
3507static Property virtio_net_properties[] = {
3508    DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3509                    VIRTIO_NET_F_CSUM, true),
3510    DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3511                    VIRTIO_NET_F_GUEST_CSUM, true),
3512    DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3513    DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3514                    VIRTIO_NET_F_GUEST_TSO4, true),
3515    DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3516                    VIRTIO_NET_F_GUEST_TSO6, true),
3517    DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3518                    VIRTIO_NET_F_GUEST_ECN, true),
3519    DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3520                    VIRTIO_NET_F_GUEST_UFO, true),
3521    DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3522                    VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3523    DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3524                    VIRTIO_NET_F_HOST_TSO4, true),
3525    DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3526                    VIRTIO_NET_F_HOST_TSO6, true),
3527    DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3528                    VIRTIO_NET_F_HOST_ECN, true),
3529    DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3530                    VIRTIO_NET_F_HOST_UFO, true),
3531    DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3532                    VIRTIO_NET_F_MRG_RXBUF, true),
3533    DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3534                    VIRTIO_NET_F_STATUS, true),
3535    DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3536                    VIRTIO_NET_F_CTRL_VQ, true),
3537    DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3538                    VIRTIO_NET_F_CTRL_RX, true),
3539    DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3540                    VIRTIO_NET_F_CTRL_VLAN, true),
3541    DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3542                    VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3543    DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3544                    VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3545    DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3546                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3547    DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3548    DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3549                    VIRTIO_NET_F_RSS, false),
3550    DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3551                    VIRTIO_NET_F_HASH_REPORT, false),
3552    DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3553                    VIRTIO_NET_F_RSC_EXT, false),
3554    DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3555                       VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3556    DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3557    DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3558                       TX_TIMER_INTERVAL),
3559    DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3560    DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3561    DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3562                       VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3563    DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3564                       VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3565    DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3566    DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3567                     true),
3568    DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3569    DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3570    DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3571    DEFINE_PROP_END_OF_LIST(),
3572};
3573
3574static void virtio_net_class_init(ObjectClass *klass, void *data)
3575{
3576    DeviceClass *dc = DEVICE_CLASS(klass);
3577    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3578
3579    device_class_set_props(dc, virtio_net_properties);
3580    dc->vmsd = &vmstate_virtio_net;
3581    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3582    vdc->realize = virtio_net_device_realize;
3583    vdc->unrealize = virtio_net_device_unrealize;
3584    vdc->get_config = virtio_net_get_config;
3585    vdc->set_config = virtio_net_set_config;
3586    vdc->get_features = virtio_net_get_features;
3587    vdc->set_features = virtio_net_set_features;
3588    vdc->bad_features = virtio_net_bad_features;
3589    vdc->reset = virtio_net_reset;
3590    vdc->set_status = virtio_net_set_status;
3591    vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3592    vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3593    vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3594    vdc->post_load = virtio_net_post_load_virtio;
3595    vdc->vmsd = &vmstate_virtio_net_device;
3596    vdc->primary_unplug_pending = primary_unplug_pending;
3597}
3598
3599static const TypeInfo virtio_net_info = {
3600    .name = TYPE_VIRTIO_NET,
3601    .parent = TYPE_VIRTIO_DEVICE,
3602    .instance_size = sizeof(VirtIONet),
3603    .instance_init = virtio_net_instance_init,
3604    .class_init = virtio_net_class_init,
3605};
3606
3607static void virtio_register_types(void)
3608{
3609    type_register_static(&virtio_net_info);
3610}
3611
3612type_init(virtio_register_types)
3613